82 #include "llvm/IR/IntrinsicsARM.h"
119 using namespace llvm;
122 #define DEBUG_TYPE "arm-isel"
124 STATISTIC(NumTailCalls,
"Number of tail calls");
125 STATISTIC(NumMovwMovt,
"Number of GAs materialized with movw + movt");
126 STATISTIC(NumLoopByVals,
"Number of loops generated for byval arguments");
128 "Number of constants with their storage promoted into constant pools");
132 cl::desc(
"Enable / disable ARM interworking (for debugging only)"),
137 cl::desc(
"Enable / disable promotion of unnamed_addr constants into "
142 cl::desc(
"Maximum size of constant to promote into a constant pool"),
146 cl::desc(
"Maximum size of ALL constants to promote into a constant pool"),
151 cl::desc(
"Maximum interleave factor for MVE VLDn to generate."),
156 ARM::R0, ARM::R1,
ARM::R2, ARM::R3
159 void ARMTargetLowering::addTypeForNEON(
MVT VT,
MVT PromotedLdStVT) {
160 if (VT != PromotedLdStVT) {
161 setOperationAction(
ISD::LOAD, VT, Promote);
162 AddPromotedToType (
ISD::LOAD, VT, PromotedLdStVT);
165 AddPromotedToType (
ISD::STORE, VT, PromotedLdStVT);
193 setOperationAction(
ISD::SHL, VT, Custom);
194 setOperationAction(
ISD::SRA, VT, Custom);
195 setOperationAction(
ISD::SRL, VT, Custom);
199 setOperationAction(
ISD::SDIV, VT, Expand);
200 setOperationAction(
ISD::UDIV, VT, Expand);
201 setOperationAction(
ISD::FDIV, VT, Expand);
202 setOperationAction(
ISD::SREM, VT, Expand);
203 setOperationAction(
ISD::UREM, VT, Expand);
204 setOperationAction(
ISD::FREM, VT, Expand);
211 setOperationAction(Opcode, VT,
Legal);
214 setOperationAction(Opcode, VT,
Legal);
217 void ARMTargetLowering::addDRTypeForNEON(
MVT VT) {
218 addRegisterClass(VT, &ARM::DPRRegClass);
222 void ARMTargetLowering::addQRTypeForNEON(
MVT VT) {
223 addRegisterClass(VT, &ARM::DPairRegClass);
227 void ARMTargetLowering::setAllExpand(
MVT VT) {
229 setOperationAction(Opc, VT, Expand);
240 void ARMTargetLowering::addAllExtLoads(
const MVT From,
const MVT To,
247 void ARMTargetLowering::addMVEVectorTypes(
bool HasMVEFP) {
250 for (
auto VT : IntTypes) {
251 addRegisterClass(VT, &ARM::MQPRRegClass);
256 setOperationAction(
ISD::SHL, VT, Custom);
257 setOperationAction(
ISD::SRA, VT, Custom);
258 setOperationAction(
ISD::SRL, VT, Custom);
268 setOperationAction(
ISD::CTTZ, VT, Custom);
283 setOperationAction(
ISD::UDIV, VT, Expand);
284 setOperationAction(
ISD::SDIV, VT, Expand);
285 setOperationAction(
ISD::UREM, VT, Expand);
286 setOperationAction(
ISD::SREM, VT, Expand);
317 setIndexedLoadAction(
im, VT,
Legal);
318 setIndexedStoreAction(
im, VT,
Legal);
319 setIndexedMaskedLoadAction(
im, VT,
Legal);
320 setIndexedMaskedStoreAction(
im, VT,
Legal);
325 for (
auto VT : FloatTypes) {
326 addRegisterClass(VT, &ARM::MQPRRegClass);
347 setIndexedLoadAction(
im, VT,
Legal);
348 setIndexedStoreAction(
im, VT,
Legal);
349 setIndexedMaskedLoadAction(
im, VT,
Legal);
350 setIndexedMaskedStoreAction(
im, VT,
Legal);
363 setOperationAction(
ISD::FDIV, VT, Expand);
364 setOperationAction(
ISD::FREM, VT, Expand);
366 setOperationAction(
ISD::FSIN, VT, Expand);
367 setOperationAction(
ISD::FCOS, VT, Expand);
368 setOperationAction(
ISD::FPOW, VT, Expand);
369 setOperationAction(
ISD::FLOG, VT, Expand);
372 setOperationAction(
ISD::FEXP, VT, Expand);
393 for (
auto VT : LongTypes) {
394 addRegisterClass(VT, &ARM::MQPRRegClass);
430 setIndexedLoadAction(
im, VT,
Legal);
431 setIndexedStoreAction(
im, VT,
Legal);
432 setIndexedMaskedLoadAction(
im, VT,
Legal);
433 setIndexedMaskedStoreAction(
im, VT,
Legal);
439 for (
auto VT : pTypes) {
440 addRegisterClass(VT, &ARM::VCCRRegClass);
449 setOperationAction(
ISD::LOAD, VT, Custom);
495 for (
int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
503 if (Subtarget->isThumb() && Subtarget->
hasVFP2Base() &&
504 Subtarget->
hasARMOps() && !Subtarget->useSoftFloat()) {
505 static const struct {
507 const char *
const Name;
529 { RTLIB::UO_F32,
"__unordsf2vfp",
ISD::SETNE },
538 { RTLIB::UO_F64,
"__unorddf2vfp",
ISD::SETNE },
563 for (
const auto &LC : LibraryCalls) {
583 static const struct {
585 const char *
const Name;
670 for (
const auto &LC : LibraryCalls) {
680 static const struct {
682 const char *
const Name;
685 } MemOpsLibraryCalls[] = {
693 for (
const auto &LC : MemOpsLibraryCalls) {
703 static const struct {
705 const char *
const Name;
718 for (
const auto &LC : LibraryCalls) {
750 static const struct {
752 const char *
const Name;
760 for (
const auto &LC : LibraryCalls) {
771 if (!Subtarget->useSoftFloat() && !Subtarget->
isThumb1Only() &&
772 Subtarget->hasFPRegs()) {
783 if (!Subtarget->hasFP64())
787 if (Subtarget->hasFullFP16()) {
796 if (Subtarget->hasBF16()) {
799 if (!Subtarget->hasFullFP16())
806 addAllExtLoads(VT, InnerVT,
Expand);
821 if (Subtarget->hasMVEIntegerOps())
822 addMVEVectorTypes(Subtarget->hasMVEFloatOps());
825 if (Subtarget->hasLOB()) {
829 if (Subtarget->hasNEON()) {
843 if (Subtarget->hasFullFP16()) {
848 if (Subtarget->hasBF16()) {
854 if (Subtarget->hasMVEIntegerOps() || Subtarget->hasNEON()) {
892 if (Subtarget->hasNEON()) {
1012 if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
1020 if (Subtarget->hasMVEIntegerOps()) {
1025 if (Subtarget->hasMVEFloatOps()) {
1029 if (!Subtarget->hasFP64()) {
1075 if (Subtarget->hasFullFP16()) {
1081 if (!Subtarget->hasFP16()) {
1129 if (Subtarget->hasDSP()) {
1151 if (Subtarget->
isThumb1Only() || !Subtarget->hasV6Ops()
1152 || (Subtarget->
isThumb2() && !Subtarget->hasDSP()))
1167 if (Subtarget->hasMVEIntegerOps())
1177 if (!Subtarget->
isThumb1Only() && Subtarget->hasV6T2Ops())
1188 if (!Subtarget->hasV5TOps() || Subtarget->
isThumb1Only()) {
1197 if (Subtarget->hasPerfMon())
1201 if (!Subtarget->hasV6Ops())
1204 bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
1205 : Subtarget->hasDivideInARMMode();
1212 if (Subtarget->
isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
1229 HasStandaloneRem =
false;
1234 const char *
const Name;
1236 } LibraryCalls[] = {
1248 for (
const auto &LC : LibraryCalls) {
1255 const char *
const Name;
1257 } LibraryCalls[] = {
1269 for (
const auto &LC : LibraryCalls) {
1313 InsertFencesForAtomic =
false;
1315 (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
1319 if (!Subtarget->isThumb() || !Subtarget->
isMClass())
1324 if (!Subtarget->hasAcquireRelease() ||
1327 InsertFencesForAtomic =
true;
1333 if (Subtarget->hasDataBarrier())
1334 InsertFencesForAtomic =
true;
1354 if (!InsertFencesForAtomic) {
1362 (!Subtarget->
isMClass() && Subtarget->hasV6Ops())) {
1374 }
else if ((Subtarget->
isMClass() && Subtarget->hasV8MBaselineOps()) ||
1375 Subtarget->hasForced32BitAtomics()) {
1389 if (!Subtarget->hasV6Ops()) {
1395 if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() &&
1421 if (Subtarget->hasFullFP16()) {
1431 if (Subtarget->hasFullFP16())
1446 if (!Subtarget->useSoftFloat() && Subtarget->
hasVFP2Base() &&
1460 if (!Subtarget->useSoftFloat() && !Subtarget->
isThumb1Only()) {
1468 if (!Subtarget->hasFP16()) {
1499 if (Subtarget->hasNEON()) {
1506 if (Subtarget->hasFP64()) {
1519 if (Subtarget->hasFullFP16()) {
1536 if (Subtarget->hasNEON()) {
1552 if (Subtarget->hasFullFP16()) {
1570 if (Subtarget->hasMVEIntegerOps())
1573 if (Subtarget->hasV6Ops())
1578 if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) ||
1585 if (Subtarget->useSoftFloat() || Subtarget->
isThumb1Only() ||
1610 if (Subtarget->isThumb() || Subtarget->
isThumb2())
1615 return Subtarget->useSoftFloat();
1628 std::pair<const TargetRegisterClass *, uint8_t>
1641 RRC = &ARM::DPRRegClass;
1651 RRC = &ARM::DPRRegClass;
1655 RRC = &ARM::DPRRegClass;
1659 RRC = &ARM::DPRRegClass;
1663 return std::make_pair(
RRC,
Cost);
1667 #define MAKE_CASE(V) \
1887 if ((Subtarget->hasMVEIntegerOps() &&
1890 (Subtarget->hasMVEFloatOps() &&
1905 if (Subtarget->hasNEON()) {
1907 return &ARM::QQPRRegClass;
1909 return &ARM::QQQQPRRegClass;
1911 if (Subtarget->hasMVEIntegerOps()) {
1913 return &ARM::MQQPRRegClass;
1915 return &ARM::MQQQQPRRegClass;
1924 Align &PrefAlign)
const {
1925 if (!isa<MemIntrinsic>(CI))
1943 unsigned NumVals =
N->getNumValues();
1947 for (
unsigned i = 0;
i != NumVals; ++
i) {
1948 EVT VT =
N->getValueType(
i);
1955 if (!
N->isMachineOpcode())
1979 if (
auto Const = dyn_cast<ConstantSDNode>(
Op.getOperand(1)))
1980 return Const->getZExtValue() == 16;
1987 if (
auto Const = dyn_cast<ConstantSDNode>(
Op.getOperand(1)))
1988 return Const->getZExtValue() == 16;
1995 if (
auto Const = dyn_cast<ConstantSDNode>(
Op.getOperand(1)))
1996 return Const->getZExtValue() == 16;
2065 bool isVarArg)
const {
2084 else if (Subtarget->hasFPRegs() && !Subtarget->
isThumb1Only() &&
2105 bool isVarArg)
const {
2106 return CCAssignFnForNode(
CC,
false, isVarArg);
2110 bool isVarArg)
const {
2111 return CCAssignFnForNode(
CC,
true, isVarArg);
2118 bool isVarArg)
const {
2119 switch (getEffectiveCallingConv(
CC, isVarArg)) {
2143 if (Subtarget->hasFullFP16()) {
2156 if (Subtarget->hasFullFP16()) {
2170 SDValue ARMTargetLowering::LowerCallResult(
2182 for (
unsigned i = 0;
i != RVLocs.size(); ++
i) {
2187 if (
i == 0 && isThisReturn) {
2189 "unexpected return calling convention register assignment");
2190 InVals.push_back(ThisVal);
2200 Chain =
Lo.getValue(1);
2201 InFlag =
Lo.getValue(2);
2205 Chain =
Hi.getValue(1);
2206 InFlag =
Hi.getValue(2);
2218 Chain =
Lo.getValue(1);
2219 InFlag =
Lo.getValue(2);
2222 Chain =
Hi.getValue(1);
2223 InFlag =
Hi.getValue(2);
2252 InVals.push_back(Val);
2258 std::pair<SDValue, MachinePointerInfo> ARMTargetLowering::computeAddrForCallArg(
2260 bool IsTailCall,
int SPDiff)
const {
2282 return std::make_pair(DstAddr, DstInfo);
2287 RegsToPassVector &RegsToPass,
2295 unsigned id = Subtarget->
isLittle() ? 0 : 1;
2308 std::tie(DstAddr, DstInfo) =
2309 computeAddrForCallArg(dl, DAG, NextVA, StackPtr, IsTailCall, SPDiff);
2310 MemOpChains.push_back(
2341 bool isStructRet = (Outs.empty()) ?
false : Outs[0].
Flags.isSRet();
2342 bool isThisReturn =
false;
2343 bool isCmseNSCall =
false;
2344 bool isSibCall =
false;
2345 bool PreferIndirect =
false;
2346 bool GuardWithBTI =
false;
2350 !Subtarget->noBTIAtReturnTwice())
2355 isCmseNSCall =
true;
2367 if (isa<GlobalAddressSDNode>(Callee)) {
2371 auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
2374 PreferIndirect = Subtarget->isThumb() && Subtarget->
hasMinSize() &&
2376 return isa<Instruction>(U) &&
2377 cast<Instruction>(U)->getParent() == BB;
2383 isTailCall = IsEligibleForTailCallOptimization(
2384 Callee, CallConv, isVarArg, isStructRet,
2400 "site marked musttail");
2408 unsigned NumBytes = CCInfo.getNextStackOffset();
2417 if (isTailCall && !isSibCall) {
2429 SPDiff = NumReusableBytes - NumBytes;
2433 if (SPDiff < 0 && AFI->getArgRegsSaveSize() < (
unsigned)-SPDiff)
2449 RegsToPassVector RegsToPass;
2457 bool AfterFormalArgLoads =
false;
2461 for (
unsigned i = 0, realArgIdx = 0,
e = ArgLocs.size();
2463 ++
i, ++realArgIdx) {
2467 bool isByVal =
Flags.isByVal();
2487 if (isTailCall && VA.
isMemLoc() && !AfterFormalArgLoads) {
2489 AfterFormalArgLoads =
true;
2501 auto ArgVT = Outs[realArgIdx].ArgVT;
2502 if (isCmseNSCall && (ArgVT ==
MVT::f16)) {
2520 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, VA, ArgLocs[++
i],
2521 StackPtr, MemOpChains, isTailCall, SPDiff);
2525 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, VA, ArgLocs[++
i],
2526 StackPtr, MemOpChains, isTailCall, SPDiff);
2531 std::tie(DstAddr, DstInfo) =
2532 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2533 MemOpChains.push_back(DAG.
getStore(Chain, dl, Op1, DstAddr, DstInfo));
2536 PassF64ArgInRegs(dl, DAG, Chain,
Arg, RegsToPass, VA, ArgLocs[++
i],
2537 StackPtr, MemOpChains, isTailCall, SPDiff);
2539 if (realArgIdx == 0 &&
Flags.isReturned() && !
Flags.isSwiftSelf() &&
2542 "unexpected calling convention register assignment");
2544 "unexpected use of 'returned'");
2545 isThisReturn =
true;
2550 RegsToPass.push_back(std::make_pair(VA.
getLocReg(),
Arg));
2551 }
else if (isByVal) {
2553 unsigned offset = 0;
2557 unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
2558 unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
2560 if (CurByValIdx < ByValArgsCount) {
2562 unsigned RegBegin, RegEnd;
2563 CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
2568 for (
i = 0,
j = RegBegin;
j < RegEnd;
i++,
j++) {
2574 MemOpChains.push_back(
Load.getValue(1));
2575 RegsToPass.push_back(std::make_pair(
j,
Load));
2580 offset = RegEnd - RegBegin;
2582 CCInfo.nextInRegsParam();
2585 if (
Flags.getByValSize() > 4*offset) {
2589 std::tie(Dst, DstInfo) =
2590 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2599 SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
2607 std::tie(DstAddr, DstInfo) =
2608 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2611 MemOpChains.push_back(
Store);
2615 if (!MemOpChains.empty())
2621 for (
unsigned i = 0,
e = RegsToPass.size();
i !=
e; ++
i) {
2623 RegsToPass[
i].second, InFlag);
2630 bool isDirect =
false;
2636 GVal =
G->getGlobal();
2640 bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->
isMClass());
2641 bool isLocalARMFunc =
false;
2644 if (Subtarget->genLongCalls()) {
2646 "long-calls codegen is not position independent!");
2650 if (isa<GlobalAddressSDNode>(Callee)) {
2654 if (Subtarget->genExecuteOnly()) {
2656 "long-calls with execute-only requires movt and movw!");
2674 const char *Sym =
S->getSymbol();
2679 if (Subtarget->genExecuteOnly()) {
2681 "long-calls with execute-only requires movt and movw!");
2699 }
else if (isa<GlobalAddressSDNode>(Callee)) {
2700 if (!PreferIndirect) {
2705 isLocalARMFunc = !Subtarget->isThumb() && (isDef || !
ARMInterworking);
2707 if (isStub && Subtarget->
isThumb1Only() && !Subtarget->hasV5TOps()) {
2719 "Windows is the only supported COFF target");
2723 else if (!
TM.shouldAssumeDSOLocal(*GVal->
getParent(), GVal))
2739 const char *Sym =
S->getSymbol();
2740 if (isARMFunc && Subtarget->
isThumb1Only() && !Subtarget->hasV5TOps()) {
2744 ARMPCLabelIndex, 4);
2758 assert(!isARMFunc && !isDirect &&
2759 "Cannot handle call to ARM function or direct call");
2762 "call to non-secure function would "
2763 "require passing arguments on stack",
2770 "call to non-secure function would return value through pointer",
2778 if (Subtarget->isThumb()) {
2781 else if (isCmseNSCall)
2783 else if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2788 if (!isDirect && !Subtarget->hasV5TOps())
2790 else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2803 if (isTailCall && !isSibCall) {
2808 std::vector<SDValue> Ops;
2809 Ops.push_back(Chain);
2810 Ops.push_back(Callee);
2818 for (
unsigned i = 0,
e = RegsToPass.size();
i !=
e; ++
i)
2820 RegsToPass[
i].second.getValueType()));
2832 isThisReturn =
false;
2838 assert(
Mask &&
"Missing call preserved mask for calling convention");
2842 Ops.push_back(InFlag);
2853 Chain = DAG.
getNode(CallOpc, dl, NodeTys, Ops);
2865 Chain = DAG.
getCALLSEQ_END(Chain, NumBytes, CalleePopBytes, InFlag, dl);
2871 return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
Ins, dl, DAG,
2872 InVals, isThisReturn,
2873 isThisReturn ? OutVals[0] :
SDValue());
2880 void ARMTargetLowering::HandleByVal(
CCState *State,
unsigned &Size,
2881 Align Alignment)
const {
2889 unsigned AlignInRegs = Alignment.
value() / 4;
2890 unsigned Waste = (
ARM::R4 -
Reg) % AlignInRegs;
2891 for (
unsigned i = 0;
i < Waste; ++
i)
2904 if (NSAAOffset != 0 && Size > Excess) {
2916 unsigned ByValRegBegin =
Reg;
2917 unsigned ByValRegEnd = std::min<unsigned>(
Reg + Size / 4,
ARM::R4);
2921 for (
unsigned i =
Reg + 1;
i != ByValRegEnd; ++
i)
2927 Size = std::max<int>(Size - Excess, 0);
2937 unsigned Bytes =
Arg.getValueSizeInBits() / 8;
2940 Register VR = cast<RegisterSDNode>(
Arg.getOperand(1))->getReg();
2946 if (!Flags.isByVal()) {
2953 if (Flags.isByVal())
2977 bool ARMTargetLowering::IsEligibleForTailCallOptimization(
2979 bool isCalleeStructRet,
bool isCallerStructRet,
2983 const bool isIndirect)
const {
2996 if (Outs.size() >= 4 &&
2997 (!isa<GlobalAddressSDNode>(
Callee.getNode()) || isIndirect)) {
3015 return CalleeCC == CallerCC;
3019 if (isCalleeStructRet || isCallerStructRet)
3033 (!
TT.isOSWindows() ||
TT.isOSBinFormatELF() ||
TT.isOSBinFormatMachO()))
3040 getEffectiveCallingConv(CalleeCC, isVarArg),
3041 getEffectiveCallingConv(CallerCC, CallerF.
isVarArg()), MF,
C,
Ins,
3048 if (CalleeCC != CallerCC) {
3063 if (!Outs.empty()) {
3067 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs,
C);
3069 if (CCInfo.getNextStackOffset()) {
3075 for (
unsigned i = 0, realArgIdx = 0,
e = ArgLocs.size();
3077 ++
i, ++realArgIdx) {
3091 if (!ArgLocs[++
i].isRegLoc())
3094 if (!ArgLocs[++
i].isRegLoc())
3096 if (!ArgLocs[++
i].isRegLoc())
3130 StringRef IntKind =
F.getFnAttribute(
"interrupt").getValueAsString();
3143 if (IntKind ==
"" || IntKind ==
"IRQ" || IntKind ==
"FIQ" ||
3146 else if (IntKind ==
"SWI" || IntKind ==
"UNDEF")
3150 "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
3152 RetOps.
insert(RetOps.begin() + 1,
3176 RetOps.push_back(Chain);
3177 bool isLittleEndian = Subtarget->
isLittle();
3189 "secure entry function would return value through pointer",
3195 for (
unsigned i = 0, realRVLocIdx = 0;
3197 ++
i, ++realRVLocIdx) {
3202 bool ReturnF16 =
false;
3237 auto RetVT = Outs[realRVLocIdx].ArgVT;
3315 RetOps.push_back(
Flag);
3335 bool ARMTargetLowering::isUsedByReturnOnly(
SDNode *
N,
SDValue &Chain)
const {
3336 if (
N->getNumValues() != 1)
3338 if (!
N->hasNUsesOfValue(1, 0))
3348 TCChain =
Copy->getOperand(0);
3370 if (U->getOperand(U->getNumOperands() - 1).getValueType() ==
MVT::Glue)
3378 if (!
Copy->hasOneUse())
3387 TCChain =
Copy->getOperand(0);
3392 bool HasRet =
false;
3407 bool ARMTargetLowering::mayBeEmittedAsTailCall(
const CallInst *CI)
const {
3425 &&
"LowerWRITE_REGISTER called for non-i64 type argument.");
3431 SDValue Ops[] = {
Op->getOperand(0),
Op->getOperand(1), Lo, Hi };
3443 EVT PtrVT =
Op.getValueType();
3453 if (Subtarget->genExecuteOnly()) {
3455 auto T =
const_cast<Type*
>(
CP->getType());
3456 auto C =
const_cast<Constant*
>(
CP->getConstVal());
3467 return LowerGlobalAddress(GA, DAG);
3472 Align CPAlign =
CP->getAlign();
3475 if (
CP->isMachineConstantPoolEntry())
3491 unsigned ARMPCLabelIndex = 0;
3494 const BlockAddress *BA = cast<BlockAddressSDNode>(
Op)->getBlockAddress();
3497 if (!IsPositionIndependent) {
3500 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
3511 if (!IsPositionIndependent)
3542 ARMTargetLowering::LowerGlobalTLSAddressDarwin(
SDValue Op,
3545 "This function expects a Darwin target");
3550 SDValue DescAddr = LowerGlobalAddressDarwin(
Op, DAG);
3586 ARMTargetLowering::LowerGlobalTLSAddressWindows(
SDValue Op,
3630 const auto *GA = cast<GlobalAddressSDNode>(
Op);
3647 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3669 Args.push_back(Entry);
3677 std::pair<SDValue, SDValue> CallResult =
LowerCallTo(CLI);
3678 return CallResult.first;
3700 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3708 PtrVT, dl, Chain,
Offset,
3710 Chain =
Offset.getValue(1);
3716 PtrVT, dl, Chain,
Offset,
3726 PtrVT, dl, Chain,
Offset,
3742 return LowerGlobalTLSAddressDarwin(
Op, DAG);
3745 return LowerGlobalTLSAddressWindows(
Op, DAG);
3754 return LowerToTLSGeneralDynamicModel(GA, DAG);
3757 return LowerToTLSExecModels(GA, DAG,
model);
3766 while (!Worklist.empty()) {
3768 if (isa<ConstantExpr>(U)) {
3773 auto *
I = dyn_cast<Instruction>(U);
3774 if (!
I ||
I->getParent()->getParent() !=
F)
3802 auto *GVar = dyn_cast<GlobalVariable>(GV);
3803 if (!GVar || !GVar->hasInitializer() ||
3804 !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
3805 !GVar->hasLocalLinkage())
3810 auto *
Init = GVar->getInitializer();
3812 Init->needsDynamicRelocation())
3821 auto *CDAInit = dyn_cast<ConstantDataArray>(
Init);
3824 unsigned RequiredPadding = 4 - (Size % 4);
3825 bool PaddingPossible =
3826 RequiredPadding == 4 || (CDAInit && CDAInit->isString());
3831 unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
3855 if (RequiredPadding != 4) {
3859 std::copy(
S.bytes_begin(),
S.bytes_end(), V.begin());
3860 while (RequiredPadding--)
3872 ++NumConstpoolPromoted;
3877 if (
const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
3878 if (!(GV = GA->getAliaseeObject()))
3880 if (
const auto *V = dyn_cast<GlobalVariable>(GV))
3881 return V->isConstant();
3882 return isa<Function>(GV);
3890 return LowerGlobalAddressWindows(
Op, DAG);
3892 return LowerGlobalAddressELF(
Op, DAG);
3894 return LowerGlobalAddressDarwin(
Op, DAG);
3902 const GlobalValue *GV = cast<GlobalAddressSDNode>(
Op)->getGlobal();
3907 if (
TM.shouldAssumeDSOLocal(*GV->
getParent(), GV) && !Subtarget->genExecuteOnly())
3912 bool UseGOT_PREL = !
TM.shouldAssumeDSOLocal(*GV->
getParent(), GV);
3921 }
else if (Subtarget->
isROPI() && IsRO) {
3926 }
else if (Subtarget->
isRWPI() && !IsRO) {
3967 "ROPI/RWPI not currently supported for Darwin");
3970 const GlobalValue *GV = cast<GlobalAddressSDNode>(
Op)->getGlobal();
3993 "Windows on ARM expects to use movw/movt");
3995 "ROPI/RWPI not currently supported for Windows");
3998 const GlobalValue *GV = cast<GlobalAddressSDNode>(
Op)->getGlobal();
4002 else if (!
TM.shouldAssumeDSOLocal(*GV->
getParent(), GV))
4027 Op.getOperand(1), Val);
4044 SDValue ARMTargetLowering::LowerINTRINSIC_VOID(
4047 cast<ConstantSDNode>(
4053 case Intrinsic::arm_gnu_eabi_mcount: {
4062 assert(
Mask &&
"Missing call preserved mask for calling convention");
4071 if (Subtarget->isThumb())
4074 ARM::tBL_PUSHLR, dl, ResultTys,
4075 {ReturnAddress, DAG.getTargetConstant(ARMCC::AL, dl, PtrVT),
4076 DAG.getRegister(0, PtrVT), Callee, RegisterMask, Chain}),
4080 {ReturnAddress, Callee, RegisterMask, Chain}),
4089 unsigned IntNo = cast<ConstantSDNode>(
Op.getOperand(0))->getZExtValue();
4093 case Intrinsic::thread_pointer: {
4097 case Intrinsic::arm_cls: {
4098 const SDValue &Operand =
Op.getOperand(1);
4099 const EVT VTy =
Op.getValueType();
4110 case Intrinsic::arm_cls64: {
4113 const SDValue &Operand =
Op.getOperand(1);
4114 const EVT VTy =
Op.getValueType();
4140 case Intrinsic::eh_sjlj_lsda: {
4147 unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
4157 if (IsPositionIndependent) {
4163 case Intrinsic::arm_neon_vabs:
4166 case Intrinsic::arm_neon_vmulls:
4167 case Intrinsic::arm_neon_vmullu: {
4168 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
4171 Op.getOperand(1),
Op.getOperand(2));
4173 case Intrinsic::arm_neon_vminnm:
4174 case Intrinsic::arm_neon_vmaxnm: {
4175 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
4178 Op.getOperand(1),
Op.getOperand(2));
4180 case Intrinsic::arm_neon_vminu:
4181 case Intrinsic::arm_neon_vmaxu: {
4182 if (
Op.getValueType().isFloatingPoint())
4184 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
4187 Op.getOperand(1),
Op.getOperand(2));
4189 case Intrinsic::arm_neon_vmins:
4190 case Intrinsic::arm_neon_vmaxs: {
4192 if (!
Op.getValueType().isFloatingPoint()) {
4193 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
4196 Op.getOperand(1),
Op.getOperand(2));
4198 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
4201 Op.getOperand(1),
Op.getOperand(2));
4203 case Intrinsic::arm_neon_vtbl1:
4205 Op.getOperand(1),
Op.getOperand(2));
4206 case Intrinsic::arm_neon_vtbl2:
4208 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
4209 case Intrinsic::arm_mve_pred_i2v:
4210 case Intrinsic::arm_mve_pred_v2i:
4213 case Intrinsic::arm_mve_vreinterpretq:
4216 case Intrinsic::arm_mve_lsll:
4218 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
4219 case Intrinsic::arm_mve_asrl:
4221 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
4233 if (!Subtarget->hasDataBarrier()) {
4237 assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
4238 "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!");
4249 }
else if (Subtarget->preferISHSTBarriers() &&
4266 (!Subtarget->
isThumb1Only() && Subtarget->hasV5TEOps())))
4268 return Op.getOperand(0);
4271 unsigned isRead = ~cast<ConstantSDNode>(
Op.getOperand(2))->getZExtValue() & 1;
4273 (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
4275 return Op.getOperand(0);
4277 unsigned isData = cast<ConstantSDNode>(
Op.getOperand(4))->getZExtValue();
4278 if (Subtarget->isThumb()) {
4280 isRead = ~isRead & 1;
4281 isData = ~isData & 1;
4298 const Value *SV = cast<SrcValueSDNode>(
Op.getOperand(2))->getValue();
4299 return DAG.
getStore(
Op.getOperand(0), dl, FR,
Op.getOperand(1),
4307 const SDLoc &dl)
const {
4313 RC = &ARM::tGPRRegClass;
4315 RC = &ARM::GPRRegClass;
4350 const Value *OrigArg,
4351 unsigned InRegsParamRecordIdx,
4352 int ArgOffset,
unsigned ArgSize)
const {
4367 unsigned RBegin, REnd;
4377 ArgOffset = -4 * (
ARM::R4 - RBegin);
4387 for (
unsigned Reg = RBegin,
i = 0;
Reg < REnd; ++
Reg, ++
i) {
4392 MemOps.push_back(
Store);
4396 if (!MemOps.empty())
4405 unsigned TotalArgRegsSaveSize,
4406 bool ForceMutable)
const {
4415 int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain,
nullptr,
4418 std::max(4U, TotalArgRegsSaveSize));
4422 bool ARMTargetLowering::splitValueIntoRegisterParts(
4424 unsigned NumParts,
MVT PartVT, std::optional<CallingConv::ID>
CC)
const {
4425 bool IsABIRegCopy =
CC.has_value();
4440 SDValue ARMTargetLowering::joinRegisterPartsIntoValue(
4442 MVT PartVT,
EVT ValueVT, std::optional<CallingConv::ID>
CC)
const {
4443 bool IsABIRegCopy =
CC.has_value();
4458 SDValue ARMTargetLowering::LowerFormalArguments(
4476 unsigned CurArgIdx = 0;
4488 unsigned ArgRegBegin =
ARM::R4;
4489 for (
unsigned i = 0,
e = ArgLocs.size();
i !=
e; ++
i) {
4496 if (!
Flags.isByVal())
4500 unsigned RBegin, REnd;
4502 ArgRegBegin =
std::min(ArgRegBegin, RBegin);
4508 int lastInsIndex = -1;
4515 unsigned TotalArgRegsSaveSize = 4 * (
ARM::R4 - ArgRegBegin);
4519 for (
unsigned i = 0,
e = ArgLocs.size();
i !=
e; ++
i) {
4522 std::advance(CurOrigArg,
4523 Ins[VA.
getValNo()].getOrigArgIndex() - CurArgIdx);
4534 GetF64FormalArgument(VA, ArgLocs[++
i], Chain, DAG, dl);
4544 ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++
i], Chain, DAG, dl);
4552 ArgValue = GetF64FormalArgument(VA, ArgLocs[++
i], Chain, DAG, dl);
4557 RC = &ARM::HPRRegClass;
4559 RC = &ARM::SPRRegClass;
4562 RC = &ARM::DPRRegClass;
4565 RC = &ARM::QPRRegClass;
4568 : &ARM::GPRRegClass;
4611 InVals.push_back(ArgValue);
4621 if (
index != lastInsIndex)
4629 if (
Flags.isByVal()) {
4631 "Byval arguments cannot be implicit");
4635 CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex,
4650 lastInsIndex =
index;
4658 TotalArgRegsSaveSize);
4662 "secure entry function must not be variadic", dl.
getDebugLoc());
4673 StackArgSize =
alignTo(StackArgSize,
DL.getStackAlignment());
4682 "secure entry function requires arguments on stack", dl.
getDebugLoc());
4692 return CFP->getValueAPF().isPosZero();
4696 SDValue WrapperOp =
Op.getOperand(1).getOperand(0);
4698 if (
const ConstantFP *CFP = dyn_cast<ConstantFP>(
CP->getConstVal()))
4699 return CFP->getValueAPF().isPosZero();
4717 const SDLoc &dl)
const {
4719 unsigned C = RHSC->getZExtValue();
4787 unsigned Mask = cast<ConstantSDNode>(
LHS.getOperand(1))->getZExtValue();
4788 auto *RHSC = cast<ConstantSDNode>(
RHS.getNode());
4792 if (RHSV && (RHSV > 255 || (RHSV << ShiftBits) <= 255)) {
4807 isa<ConstantSDNode>(
RHS) &&
4808 cast<ConstantSDNode>(
RHS)->getZExtValue() == 0x80000000U &&
4810 cast<ConstantSDNode>(
LHS.getOperand(1))->getZExtValue() < 31) {
4812 cast<ConstantSDNode>(
LHS.getOperand(1))->getZExtValue() + 1;
4858 bool Signaling)
const {
4874 unsigned Opc =
Cmp.getOpcode();
4881 Opc =
Cmp.getOpcode();
4895 std::pair<SDValue, SDValue>
4910 switch (
Op.getOpcode()) {
4962 return std::make_pair(
Value, OverflowCmp);
4973 std::tie(
Value, OverflowCmp) = getARMXALUOOp(
Op, DAG, ARMcc);
4979 EVT VT =
Op.getValueType();
4982 ARMcc, CCR, OverflowCmp);
5022 EVT VT =
Op.getValueType();
5026 switch (
Op.getOpcode()) {
5051 EVT VT =
Op.getValueType();
5052 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
5062 switch (
Op->getOpcode()) {
5078 switch (
Op->getOpcode()) {
5108 unsigned Opc =
Cond.getOpcode();
5110 if (
Cond.getResNo() == 1 &&
5118 std::tie(
Value, OverflowCmp) = getARMXALUOOp(
Cond, DAG, ARMcc);
5120 EVT VT =
Op.getValueType();
5122 return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, CCR,
5133 dyn_cast<ConstantSDNode>(
Cond.getOperand(0));
5135 dyn_cast<ConstantSDNode>(
Cond.getOperand(1));
5137 if (CMOVTrue && CMOVFalse) {
5143 if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
5145 False = SelectFalse;
5146 }
else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
5152 EVT VT =
Op.getValueType();
5157 return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG);
5173 bool &swpCmpOps,
bool &swpVselOps) {
5201 swpCmpOps = !swpCmpOps;
5202 swpVselOps = !swpVselOps;
5225 if (!Subtarget->hasFP64() && VT ==
MVT::f64) {
5239 ARMcc, CCR, duplicateCmp(Cmp, DAG));
5289 EVT VT =
Op.getValueType();
5296 const SDValue Op2 = isa<ConstantSDNode>(TrueVal1) ? FalseVal1 : TrueVal1;
5311 if (V1Tmp != TrueVal1 || V2Tmp != TrueVal2 || K1 != FalseVal1 ||
5319 if (!isa<ConstantSDNode>(K1) || !isa<ConstantSDNode>(K2))
5322 int64_t Val1 = cast<ConstantSDNode>(K1)->getSExtValue();
5323 int64_t Val2 = cast<ConstantSDNode>(K2)->getSExtValue();
5324 int64_t PosVal =
std::max(Val1, Val2);
5325 int64_t NegVal =
std::min(Val1, Val2);
5377 if (*K != KTmp || V != VTmp)
5388 bool ARMTargetLowering::isUnsupportedFloatingType(
EVT VT)
const {
5392 return !Subtarget->hasFP64();
5394 return !Subtarget->hasFullFP16();
5399 EVT VT =
Op.getValueType();
5403 if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->
isThumb2())
5435 if (Subtarget->hasV8_1MMainlineOps() && CFVal && CTVal &&
5439 unsigned Opcode = 0;
5441 if (TVal == ~FVal) {
5443 }
else if (TVal == ~FVal + 1) {
5445 }
else if (TVal + 1 == FVal) {
5447 }
else if (TVal == FVal + 1) {
5483 if (isUnsupportedFloatingType(
LHS.getValueType())) {
5489 if (!
RHS.getNode()) {
5521 if (cast<ConstantSDNode>(ARMcc)->getZExtValue() ==
ARMCC::PL)
5538 bool swpCmpOps =
false;
5539 bool swpVselOps =
false;
5559 Result = getCMOV(dl, VT, Result,
TrueVal, ARMcc2, CCR, Cmp2, DAG);
5569 if (!
N->hasOneUse())
5572 if (!
N->getNumValues())
5574 EVT VT =
Op.getValueType();
5575 if (VT !=
MVT::f32 && !Subtarget->isFPBrccSlow())
5593 Ld->getPointerInfo(), Ld->getAlign(),
5594 Ld->getMemOperand()->getFlags());
5613 Ld->getAlign(), Ld->getMemOperand()->getFlags());
5615 EVT PtrType =
Ptr.getValueType();
5619 Ld->getPointerInfo().getWithOffset(4),
5621 Ld->getMemOperand()->getFlags());
5639 bool LHSSeenZero =
false;
5641 bool RHSSeenZero =
false;
5643 if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {
5662 Chain, Dest, ARMcc, CCR, Cmp);
5674 SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
5689 unsigned Opc =
Cond.getOpcode();
5692 if (
Cond.getResNo() == 1 &&
5702 std::tie(
Value, OverflowCmp) = getARMXALUOOp(
Cond, DAG, ARMcc);
5726 if (isUnsupportedFloatingType(
LHS.getValueType())) {
5732 if (!
RHS.getNode()) {
5740 unsigned Opc =
LHS.getOpcode();
5754 std::tie(
Value, OverflowCmp) = getARMXALUOOp(
LHS.getValue(0), DAG, ARMcc);
5774 Chain, Dest, ARMcc, CCR, Cmp);
5780 if (
SDValue Result = OptimizeVFPBrcond(
Op, DAG))
5791 SDValue Ops[] = { Chain, Dest, ARMcc, CCR,
Cmp };
5813 if (Subtarget->
isThumb2() || (Subtarget->hasV8MBaselineOps() && Subtarget->isThumb())) {
5819 Addr,
Op.getOperand(2), JTI);
5825 Chain =
Addr.getValue(1);
5832 Chain =
Addr.getValue(1);
5838 EVT VT =
Op.getValueType();
5841 if (
Op.getValueType().getVectorElementType() ==
MVT::i32) {
5842 if (
Op.getOperand(0).getValueType().getVectorElementType() ==
MVT::f32)
5850 const EVT OpTy =
Op.getOperand(0).getValueType();
5863 Op = DAG.
getNode(
Op.getOpcode(), dl, NewTy,
Op.getOperand(0));
5868 EVT VT =
Op.getValueType();
5872 bool IsStrict =
Op->isStrictFPOpcode();
5873 SDValue SrcVal =
Op.getOperand(IsStrict ? 1 : 0);
5875 if (isUnsupportedFloatingType(SrcVal.
getValueType())) {
5885 MakeLibCallOptions CallOptions;
5888 std::tie(Result, Chain) =
makeLibCall(DAG, LC,
Op.getValueType(), SrcVal,
5889 CallOptions, Loc, Chain);
5899 Loc,
Op.getValueType(), SrcVal);
5908 EVT VT =
Op.getValueType();
5909 EVT ToVT = cast<VTSDNode>(
Op.getOperand(1))->getVT();
5910 EVT FromVT =
Op.getOperand(0).getValueType();
5915 Subtarget->hasFP64())
5918 Subtarget->hasFullFP16())
5921 Subtarget->hasMVEFloatOps())
5924 Subtarget->hasMVEFloatOps())
5944 EVT VT =
Op.getValueType();
5947 if (
Op.getOperand(0).getValueType().getVectorElementType() ==
MVT::i32) {
5955 "Invalid type for custom lowering!");
5971 switch (
Op.getOpcode()) {
5983 Op = DAG.
getNode(CastOpc, dl, DestVecType,
Op.getOperand(0));
5988 EVT VT =
Op.getValueType();
5991 if (isUnsupportedFloatingType(VT)) {
5999 MakeLibCallOptions CallOptions;
6001 CallOptions,
SDLoc(
Op)).first;
6012 EVT VT =
Op.getValueType();
6016 bool UseNEON = !InGPR && Subtarget->hasNEON();
6097 EVT VT =
Op.getValueType();
6099 unsigned Depth = cast<ConstantSDNode>(
Op.getOperand(0))->getZExtValue();
6101 SDValue FrameAddr = LowerFRAMEADDR(
Op, DAG);
6120 EVT VT =
Op.getValueType();
6122 unsigned Depth = cast<ConstantSDNode>(
Op.getOperand(0))->getZExtValue();
6136 .
Case(
"sp", ARM::SP)
6152 &&
"ExpandREAD_REGISTER called for non-i64 type result.");
6161 Results.push_back(Read.getOperand(0));
6195 const APInt &APIntIndex =
Index->getAPIntValue();
6197 NewIndex *= APIntIndex;
6227 EVT SrcVT =
Op.getValueType();
6228 EVT DstVT =
N->getValueType(0);
6296 assert(
Op.getNumOperands() == 3 &&
"Not a double-shift!");
6297 EVT VT =
Op.getValueType();
6316 SDValue LoBigShift = DAG.
getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
6324 ? DAG.
getNode(Opc, dl, VT, ShOpHi,
6340 assert(
Op.getNumOperands() == 3 &&
"Not a double-shift!");
6341 EVT VT =
Op.getValueType();
6443 EVT VT =
N->getValueType(0);
6489 if (!
ST->hasV6T2Ops())
6498 EVT VT =
N->getValueType(0);
6501 assert(
ST->hasNEON() &&
"Custom ctpop lowering requires NEON.");
6504 "Unexpected type for custom ctpop lowering");
6512 unsigned EltSize = 8;
6516 Ops.push_back(DAG.
getConstant(Intrinsic::arm_neon_vpaddlu,
DL,
6535 Op =
Op.getOperand(0);
6537 APInt SplatBits, SplatUndef;
6538 unsigned SplatBitSize;
6541 !BVN->
isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
6543 SplatBitSize > ElementBits)
6554 assert(VT.
isVector() &&
"vector shift count is not a vector type");
6558 return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
6569 assert(VT.
isVector() &&
"vector shift count is not a vector type");
6574 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
6575 if (Cnt >= -(isNarrow ? ElementBits / 2 : ElementBits) && Cnt <= -1) {
6584 EVT VT =
N->getValueType(0);
6606 "unexpected vector shift opcode");
6608 if (
isVShiftRImm(
N->getOperand(1), VT,
false,
false, Cnt)) {
6609 unsigned VShiftOpc =
6611 return DAG.
getNode(VShiftOpc, dl, VT,
N->getOperand(0),
6617 EVT ShiftVT =
N->getOperand(1).getValueType();
6620 unsigned VShiftOpc =
6622 return DAG.
getNode(VShiftOpc, dl, VT,
N->getOperand(0), NegatedCount);
6627 EVT VT =
N->getValueType(0);
6636 "Unknown shift to lower!");
6638 unsigned ShOpc =
N->getOpcode();
6639 if (
ST->hasMVEIntegerOps()) {
6677 Hi =
SDValue(Lo.getNode(), 1);
6686 if (
ST->isThumb1Only())
6709 bool Invert =
false;
6716 EVT VT =
Op.getValueType();
6725 "No hardware support for integer vector comparison!");
6727 if (
Op.getValueType().getVectorElementType() !=
MVT::i1)
6752 Merged = DAG.
getNOT(dl, Merged, CmpVT);
6762 switch (SetCCOpcode) {
6766 if (
ST->hasMVEFloatOps()) {
6769 Invert =
true; [[fallthrough]];
6774 case ISD::SETLT: Swap =
true; [[fallthrough]];
6778 case ISD::SETLE: Swap =
true; [[fallthrough]];
6794 Result = DAG.
getNOT(dl, Result, VT);
6797 case ISD::SETUO: Invert =
true; [[fallthrough]];
6806 Result = DAG.
getNOT(dl, Result, VT);
6812 switch (SetCCOpcode) {
6815 if (
ST->hasMVEIntegerOps()) {
6818 Invert =
true; [[fallthrough]];
6821 case ISD::SETLT: Swap =
true; [[fallthrough]];
6823 case ISD::SETLE: Swap =
true; [[fallthrough]];
6848 Result = DAG.
getNOT(dl, Result, VT);
6882 Result = DAG.
getNOT(dl, Result, VT);
6894 assert(
LHS.getSimpleValueType().isInteger() &&
"SETCCCARRY is integer only.");
6924 unsigned OpCmode,
Imm;
6935 switch (SplatBitSize) {
6940 assert((SplatBits & ~0xff) == 0 &&
"one byte splat value is too big");
6949 if ((SplatBits & ~0xff) == 0) {
6955 if ((SplatBits & ~0xff00) == 0) {
6958 Imm = SplatBits >> 8;
6969 if ((SplatBits & ~0xff) == 0) {
6975 if ((SplatBits & ~0xff00) == 0) {
6978 Imm = SplatBits >> 8;
6981 if ((SplatBits & ~0xff0000) == 0) {
6984 Imm = SplatBits >> 16;
6987 if ((SplatBits & ~0xff000000) == 0) {
6990 Imm = SplatBits >> 24;
6997 if ((SplatBits & ~0xffff) == 0 &&
6998 ((SplatBits | SplatUndef) & 0xff) == 0xff) {
7001 Imm = SplatBits >> 8;
7009 if ((SplatBits & ~0xffffff) == 0 &&
7010 ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
7013 Imm = SplatBits >> 16;
7031 for (
int ByteNum = 0; ByteNum < 8; ++ByteNum) {
7032 if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
7034 }
else if ((SplatBits & BitMask) != 0) {
7044 unsigned Mask = (1 << BytesPerElem) - 1;
7045 unsigned NumElems = 8 / BytesPerElem;
7046 unsigned NewImm = 0;
7047 for (
unsigned ElemNum = 0; ElemNum < NumElems; ++ElemNum) {
7048 unsigned Elem = ((
Imm >> ElemNum * BytesPerElem) &
Mask);
7049 NewImm |= Elem << (NumElems - ElemNum - 1) * BytesPerElem;
7070 EVT VT =
Op.getValueType();
7077 if (
ST->genExecuteOnly()) {
7099 if (!
ST->hasVFP3Base())
7104 if (IsDouble && !Subtarget->hasFP64())
7111 if (IsDouble || !
ST->useNEONForSinglePrecisionFP()) {
7129 if (!
ST->hasNEON() || (!IsDouble && !
ST->useNEONForSinglePrecisionFP()))
7138 if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32))
7192 unsigned ExpectedElt =
Imm;
7193 for (
unsigned i = 1;
i < NumElts; ++
i) {
7197 if (ExpectedElt == NumElts)
7200 if (
M[
i] < 0)
continue;
7201 if (ExpectedElt !=
static_cast<unsigned>(
M[
i]))
7209 bool &ReverseVEXT,
unsigned &
Imm) {
7211 ReverseVEXT =
false;
7222 unsigned ExpectedElt =
Imm;
7223 for (
unsigned i = 1;
i < NumElts; ++
i) {
7227 if (ExpectedElt == NumElts * 2) {
7232 if (
M[
i] < 0)
continue;
7233 if (ExpectedElt !=
static_cast<unsigned>(
M[
i]))
7253 if (
Mask.size() == Elements * 2)
7254 return Index / Elements;
7285 if (
M.size() != NumElts &&
M.size() != NumElts*2)
7293 for (
unsigned i = 0;
i <
M.size();
i += NumElts) {
7295 for (
unsigned j = 0;
j < NumElts;
j += 2) {
7296 if ((
M[
i+
j] >= 0 && (
unsigned)
M[
i+
j] !=
j + WhichResult) ||
7297 (
M[
i+
j+1] >= 0 && (
unsigned)
M[
i+
j+1] !=
j + NumElts + WhichResult))
7302 if (
M.size() == NumElts*2)
7317 if (
M.size() != NumElts &&
M.size() != NumElts*2)
7320 for (
unsigned i = 0;
i <
M.size();
i += NumElts) {
7322 for (
unsigned j = 0;
j < NumElts;
j += 2) {
7323 if ((
M[
i+
j] >= 0 && (
unsigned)
M[
i+
j] !=
j + WhichResult) ||
7324 (
M[
i+
j+1] >= 0 && (
unsigned)
M[
i+
j+1] !=
j + WhichResult))
7329 if (
M.size() == NumElts*2)
7349 if (
M.size() != NumElts &&
M.size() != NumElts*2)
7352 for (
unsigned i = 0;
i <
M.size();
i += NumElts) {
7354 for (
unsigned j = 0;
j < NumElts; ++
j) {
7355 if (
M[
i+
j] >= 0 && (
unsigned)
M[
i+
j] != 2 *
j + WhichResult)
7360 if (
M.size() == NumElts*2)
7379 if (
M.size() != NumElts &&
M.size() != NumElts*2)
7382 unsigned Half = NumElts / 2;
7383 for (
unsigned i = 0;
i <
M.size();
i += NumElts) {
7385 for (
unsigned j = 0;
j < NumElts;
j += Half) {
7386 unsigned Idx = WhichResult;
7387 for (
unsigned k = 0; k < Half; ++k) {
7388 int MIdx =
M[
i +
j + k];
7389 if (MIdx >= 0 && (
unsigned) MIdx != Idx)
7396 if (
M.size() == NumElts*2)
7420 if (
M.size() != NumElts &&
M.size() != NumElts*2)
7423 for (
unsigned i = 0;
i <
M.size();
i += NumElts) {
7425 unsigned Idx = WhichResult * NumElts / 2;
7426 for (
unsigned j = 0;
j < NumElts;
j += 2) {
7427 if ((
M[
i+
j] >= 0 && (
unsigned)
M[
i+
j] != Idx) ||
7428 (
M[
i+
j+1] >= 0 && (
unsigned)
M[
i+
j+1] != Idx + NumElts))
7434 if (
M.size() == NumElts*2)
7453 if (
M.size() != NumElts &&
M.size() != NumElts*2)
7456 for (
unsigned i = 0;
i <
M.size();
i += NumElts) {
7458 unsigned Idx = WhichResult * NumElts / 2;
7459 for (
unsigned j = 0;
j < NumElts;
j += 2) {
7460 if ((
M[
i+
j] >= 0 && (
unsigned)
M[
i+
j] != Idx) ||
7461 (
M[
i+
j+1] >= 0 && (
unsigned)
M[
i+
j+1] != Idx))
7467 if (
M.size() == NumElts*2)
7480 unsigned &WhichResult,
7483 if (
isVTRNMask(ShuffleMask, VT, WhichResult))
7485 if (
isVUZPMask(ShuffleMask, VT, WhichResult))
7487 if (
isVZIPMask(ShuffleMask, VT, WhichResult))
7505 if (NumElts !=
M.size())
7509 for (
unsigned i = 0;
i != NumElts; ++
i)
7510 if (
M[
i] >= 0 &&
M[
i] != (
int) (NumElts - 1 -
i))
7527 int Ofs = Top ? 1 : 0;
7528 int Upper = SingleSource ? 0 : NumElts;
7529 for (
int i = 0,
e = NumElts / 2;
i !=
e; ++
i) {
7530 if (
M[
i] >= 0 &&
M[
i] != (
i * 2) + Ofs)
7532 if (
M[
i +
e] >= 0 &&
M[
i +
e] != (
i * 2) + Ofs +
Upper)
7550 unsigned Offset = Top ? 0 : 1;
7551 unsigned N = SingleSource ? 0 : NumElts;
7552 for (
unsigned i = 0;
i < NumElts;
i += 2) {
7553 if (
M[
i] >= 0 &&
M[
i] != (
int)
i)
7555 if (
M[
i + 1] >= 0 &&
M[
i + 1] != (
int)(
N +
i +
Offset))
7564 if (NumElts !=
M.size())
7572 unsigned Off0 = rev ? NumElts / 2 : 0;
7573 unsigned Off1 = rev ? 0 : NumElts / 2;
7574 for (
unsigned i = 0;
i < NumElts;
i += 2) {
7575 if (
M[
i] >= 0 &&
M[
i] != (
int)(Off0 +
i / 2))
7577 if (
M[
i + 1] >= 0 &&
M[
i + 1] != (
int)(Off1 +
i / 2))
7593 if (!
ST->hasMVEFloatOps())
7619 for (
unsigned i = 1;
i < 4;
i++) {
7646 if (!
ST->hasMVEFloatOps())
7667 for (
unsigned i = 1;
i < 4;
i++) {
7688 if (!isa<ConstantSDNode>(
N))
7690 Val = cast<ConstantSDNode>(
N)->getZExtValue();
7692 if (
ST->isThumb1Only()) {
7693 if (Val <= 255 || ~Val <= 255)
7705 EVT VT =
Op.getValueType();
7707 assert(
ST->hasMVEIntegerOps() &&
"LowerBUILD_VECTOR_i1 called without MVE!");
7711 unsigned BitsPerBool;
7715 }
else if (NumElts == 4) {
7718 }
else if (NumElts == 8) {
7721 }
else if (NumElts == 16) {
7730 if (!isa<ConstantSDNode>(FirstOp) &&
7732 return U.get().isUndef() || U.get() == FirstOp;
7740 unsigned Bits32 = 0;
7741 for (
unsigned i = 0;
i < NumElts; ++
i) {
7743 if (!isa<ConstantSDNode>(V) && !V.
isUndef())
7745 bool BitSet = V.
isUndef() ?
false : cast<ConstantSDNode>(V)->getZExtValue();
7747 Bits32 |= BoolMask << (
i * BitsPerBool);
7753 for (
unsigned i = 0;
i < NumElts; ++
i) {
7755 if (isa<ConstantSDNode>(V) || V.
isUndef())
7766 if (!
ST->hasMVEIntegerOps())
7770 EVT VT =
Op.getValueType();
7780 if (
N != 1 &&
N != 2 &&
N != 4 &&
N != 8)
7784 for (
unsigned I = 2;
I < NumElts;
I++) {
7800 switch (
N->getOpcode()) {
7809 return N->getOperand(1).getNode() ==
Op;
7811 switch (
N->getConstantOperandVal(0)) {
7812 case Intrinsic::arm_mve_add_predicated:
7813 case Intrinsic::arm_mve_mul_predicated:
7814 case Intrinsic::arm_mve_qadd_predicated:
7815 case Intrinsic::arm_mve_vhadd:
7816 case Intrinsic::arm_mve_hadd_predicated:
7817 case Intrinsic::arm_mve_vqdmulh:
7818 case Intrinsic::arm_mve_qdmulh_predicated:
7819 case Intrinsic::arm_mve_vqrdmulh:
7820 case Intrinsic::arm_mve_qrdmulh_predicated:
7821 case Intrinsic::arm_mve_vqdmull:
7822 case Intrinsic::arm_mve_vqdmull_predicated:
7824 case Intrinsic::arm_mve_sub_predicated:
7825 case Intrinsic::arm_mve_qsub_predicated:
7826 case Intrinsic::arm_mve_vhsub:
7827 case Intrinsic::arm_mve_hsub_predicated:
7828 return N->getOperand(2).getNode() ==
Op;
7843 EVT VT =
Op.getValueType();
7851 APInt SplatBits, SplatUndef;
7852 unsigned SplatBitSize;
7854 if (BVN->
isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
7861 (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32) &&
7863 [BVN](
const SDNode *U) { return IsQRMVEInstruction(U, BVN); })) {
7872 if ((
ST->hasNEON() && SplatBitSize <= 64) ||
7873 (
ST->hasMVEIntegerOps() && SplatBitSize <= 64)) {
7878 SplatBitSize, DAG, dl, VmovVT, VT,
VMOVModImm);
7886 uint64_t NegatedImm = (~SplatBits).getZExtValue();
7888 NegatedImm, SplatUndef.
getZExtValue(), SplatBitSize, DAG, dl, VmovVT,
7906 if (
ST->hasMVEIntegerOps() &&
7907 (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32)) {
7926 bool isOnlyLowElement =
true;
7927 bool usesOnlyOneValue =
true;
7928 bool hasDominantValue =
false;
7935 for (
unsigned i = 0;
i < NumElts; ++
i) {
7940 isOnlyLowElement =
false;
7941 if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
7944 ValueCounts.
insert(std::make_pair(V, 0));
7945 unsigned &Count = ValueCounts[V];
7948 if (++Count > (NumElts / 2)) {
7949 hasDominantValue =
true;
7953 if (ValueCounts.
size() != 1)
7954 usesOnlyOneValue =
false;
7955 if (!
Value.getNode() && !ValueCounts.
empty())
7958 if (ValueCounts.
empty())
7970 if (hasDominantValue && EltSize <= 32) {
7981 (constIndex = dyn_cast<ConstantSDNode>(
Value->getOperand(1)))) {
7986 if (VT !=
Value->getOperand(0).getValueType()) {
7999 if (!usesOnlyOneValue) {
8002 for (
unsigned I = 0;
I < NumElts; ++
I) {
8007 Ops.push_back(
Op.getOperand(
I));
8019 for (
unsigned i = 0;
i < NumElts; ++
i)
8024 Val = LowerBUILD_VECTOR(Val, DAG,
ST);
8028 if (usesOnlyOneValue) {
8063 Lower = LowerBUILD_VECTOR(Lower, DAG,
ST);
8067 Upper = LowerBUILD_VECTOR(Upper, DAG,
ST);
8075 if (EltSize >= 32) {
8081 for (
unsigned i = 0;
i < NumElts; ++
i)
8095 for (
unsigned i = 0 ;
i < NumElts; ++
i) {
8114 EVT VT =
Op.getValueType();
8117 struct ShuffleSourceInfo {
8120 unsigned MaxElt = 0;
8130 int WindowScale = 1;
8132 ShuffleSourceInfo(
SDValue Vec) : Vec(Vec), ShuffleVec(Vec) {}
8140 for (
unsigned i = 0;
i < NumElts; ++
i) {
8148 }
else if (!isa<ConstantSDNode>(V.
getOperand(1))) {
8157 if (
Source == Sources.end())
8158 Source = Sources.
insert(Sources.end(), ShuffleSourceInfo(SourceVec));
8161 unsigned EltNo = cast<ConstantSDNode>(V.
getOperand(1))->getZExtValue();
8168 if (Sources.size() > 2)
8174 for (
auto &
Source : Sources) {
8175 EVT SrcEltTy =
Source.Vec.getValueType().getVectorElementType();
8176 if (SrcEltTy.
bitsLT(SmallestEltTy))
8177 SmallestEltTy = SrcEltTy;
8179 unsigned ResMultiplier =
8187 for (
auto &Src : Sources) {
8188 EVT SrcVT = Src.ShuffleVec.getValueType();
8192 if (SrcVTSize == VTSize)
8201 if (SrcVTSize < VTSize) {
8202 if (2 * SrcVTSize != VTSize)
8208 DAG.
getUNDEF(Src.ShuffleVec.getValueType()));
8212 if (SrcVTSize != 2 * VTSize)
8215 if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
8220 if (Src.MinElt >= NumSrcElts) {
8225 Src.WindowBase = -NumSrcElts;
8226 }
else if (Src.MaxElt < NumSrcElts) {
8243 Src.WindowBase = -Src.MinElt;
8250 for (
auto &Src : Sources) {
8251 EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
8252 if (SrcEltTy == SmallestEltTy)
8257 Src.WindowBase *= Src.WindowScale;
8263 assert(Src.ShuffleVec.getValueType() == ShuffleVT););
8270 if (Entry.isUndef())
8273 auto Src =
llvm::find(Sources, Entry.getOperand(0));
8274 int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
8279 EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
8282 int LanesDefined = BitsDefined / BitsPerShuffleLane;
8286 int *LaneMask = &
Mask[
i * ResMultiplier];
8288 int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
8289 ExtractBase += NumElts * (Src - Sources.begin());
8290 for (
int j = 0;
j < LanesDefined; ++
j)
8291 LaneMask[
j] = ExtractBase +
j;
8297 assert(Sources.size() <= 2 &&
"Too many sources!");
8300 for (
unsigned i = 0;
i < Sources.size(); ++
i)
8329 unsigned OpNum = (PFEntry >> 26) & 0x0F;
8349 unsigned PFIndexes[4];
8350 for (
unsigned i = 0;
i != 4; ++
i) {
8354 PFIndexes[
i] =
M[
i];
8358 unsigned PFTableIndex =
8359 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
8361 unsigned Cost = (PFEntry >> 30);
8367 bool ReverseVEXT, isV_UNDEF;
8368 unsigned Imm, WhichResult;
8371 if (EltSize >= 32 ||
8378 else if (Subtarget->hasNEON() &&
8386 else if (Subtarget->hasMVEIntegerOps() &&
8390 else if (Subtarget->hasMVEIntegerOps() &&
8404 unsigned OpNum = (PFEntry >> 26) & 0x0F;
8405 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
8406 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
8409 if (LHSID == (1*9+2)*9+3)
return LHS;
8410 assert(LHSID == ((4*9+5)*9+6)*9+7 &&
"Illegal OP_COPY!");
8467 for (
int I : ShuffleMask)
8470 if (
V2.getNode()->isUndef())
8480 EVT VT =
Op.getValueType();
8483 "Expect an v8i16/v16i8 type");
8489 std::vector<int> NewMask;
8493 NewMask.push_back(
i);
8549 EVT VT =
Op.getValueType();
8554 "No support for vector shuffle of boolean predicates");
8580 "Expected identical vector type in expanded i1 shuffle!");
8584 PredAsVector2, ShuffleMask);
8606 EVT VT =
Op.getValueType();
8611 "Unexpected vector type");
8613 int QuarterSize = NumElts / 4;
8623 if (ShuffleMask[Start +
i] >= 0) {
8624 if (ShuffleMask[Start +
i] %
Length !=
i)
8626 MovIdx = ShuffleMask[Start +
i] /
Length;
8635 if (ShuffleMask[Start +
i] >= 0 &&
8636 (ShuffleMask[Start +
i] /
Length != MovIdx ||
8637 ShuffleMask[Start +
i] %
Length !=
i))
8643 for (
int Part = 0; Part < 4; ++Part) {
8645 int Elt = getMovIdx(ShuffleMask, Part * QuarterSize, QuarterSize);
8649 Input =
Op->getOperand(1);
8659 if (!Parts[0] && !Parts[1] && !Parts[2] && !Parts[3])
8664 if (!Parts[0] || !Parts[1] || !Parts[2] || !Parts[3]) {
8666 for (
int Part = 0; Part < 4; ++Part)
8667 for (
int i = 0;
i < QuarterSize;
i++)
8668 NewShuffleMask.push_back(
8669 Parts[Part] ? -1 : ShuffleMask[Part * QuarterSize +
i]);
8671 VT, dl,
Op->getOperand(0),
Op->getOperand(1), NewShuffleMask);
8674 for (
int Part = 0; Part < 4; ++Part)
8690 EVT VT =
Op.getValueType();
8702 for (
int i = 0, NumMaskElts =
Mask.size();
i < NumMaskElts; ++
i) {
8706 if (
Mask[
i] !=
i + BaseOffset) {
8707 if (OffElement == -1)
8713 return NonUndef > 2 && OffElement != -1;
8717 if (isOneOffIdentityMask(ShuffleMask, VT, 0, OffElement))
8719 else if (isOneOffIdentityMask(ShuffleMask, VT, NumElts, OffElement))
8727 : VT.getScalarType();
8730 ShuffleMask[OffElement] < (
int)NumElts ? V1 :
V2,
8741 EVT VT =
Op.getValueType();
8745 if (
ST->hasMVEIntegerOps() && EltSize == 1)
8756 if (EltSize <= 32) {
8760 if (Lane == -1) Lane = 0;
8771 bool IsScalarToVector =
true;
8774 IsScalarToVector =
false;
8777 if (IsScalarToVector)
8784 bool ReverseVEXT =
false;
8810 unsigned WhichResult = 0;
8811 bool isV_UNDEF =
false;
8812 if (
ST->hasNEON()) {
8814 ShuffleMask, VT, WhichResult, isV_UNDEF)) {
8821 if (
ST->hasMVEIntegerOps()) {
8856 }) &&
"Unexpected shuffle index into UNDEF operand!");
8859 ShuffleMask, SubVT, WhichResult, isV_UNDEF)) {
8862 assert((WhichResult == 0) &&
8863 "In-place shuffle of concat can only have one result!");
8872 if (
ST->hasMVEIntegerOps() && EltSize <= 32) {
8876 for (
bool Top : {
false,
true}) {
8877 for (
bool SingleSource : {
false,
true}) {
8878 if (
isTruncMask(ShuffleMask, VT, Top, SingleSource)) {
8883 SingleSource ? V1 :
V2);
8899 unsigned PFIndexes[4];
8900 for (
unsigned i = 0;
i != 4; ++
i) {
8901 if (ShuffleMask[
i] < 0)
8904 PFIndexes[
i] = ShuffleMask[
i];
8908 unsigned PFTableIndex =
8909 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
8911 unsigned Cost = (PFEntry >> 30);
8917 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
8918 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
8928 if (EltSize >= 32) {
8936 for (
unsigned i = 0;
i < NumElts; ++
i) {
8937 if (ShuffleMask[
i] < 0)
8938 Ops.push_back(DAG.
getUNDEF(EltVT));
8941 ShuffleMask[
i] < (
int)NumElts ? V1 :
V2,
8957 if (
ST->hasMVEIntegerOps())
8966 EVT VecVT =
Op.getOperand(0).getValueType();
8970 "LowerINSERT_VECTOR_ELT_i1 called without MVE!");
8974 unsigned Lane = cast<ConstantSDNode>(
Op.getOperand(2))->getZExtValue();
8975 unsigned LaneWidth =
8977 unsigned Mask = ((1 << LaneWidth) - 1) << Lane * LaneWidth;
8989 if (!isa<ConstantSDNode>(Lane))
8995 if (Subtarget->hasMVEIntegerOps() &&
8996 Op.getValueType().getScalarSizeInBits() == 1)
9020 IVecIn, IElt, Lane);
9029 EVT VecVT =
Op.getOperand(0).getValueType();
9033 "LowerINSERT_VECTOR_ELT_i1 called without MVE!");
9037 unsigned Lane = cast<ConstantSDNode>(
Op.getOperand(1))->getZExtValue();
9038 unsigned LaneWidth =
9049 if (!isa<ConstantSDNode>(Lane))
9069 assert(
Op.getValueType().getScalarSizeInBits() == 1 &&
9070 "Unexpected custom CONCAT_VECTORS lowering");
9072 "Unexpected custom CONCAT_VECTORS lowering");
9074 "CONCAT_VECTORS lowering only supported for MVE");
9078 EVT Op2VT =
V2.getValueType();
9079 assert(Op1VT == Op2VT &&
"Operand types don't match!");
9098 auto ExtractInto = [&DAG, &dl](
SDValue NewV,
SDValue ConVec,
unsigned &
j) {
9099 EVT NewVT = NewV.getValueType();
9100 EVT ConcatVT = ConVec.getValueType();
9110 ConVec = ExtractInto(NewV1, ConVec,
j);
9111 ConVec = ExtractInto(NewV2, ConVec,
j);
9128 while (ConcatOps.size() > 1) {
9129 for (
unsigned I = 0,
E = ConcatOps.size();
I !=
E;
I += 2) {
9132 ConcatOps[
I / 2] = ConcatPair(V1,
V2);
9134 ConcatOps.
resize(ConcatOps.size() / 2);
9136 return ConcatOps[0];
9141 EVT VT =
Op->getValueType(0);
9147 assert(
Op.getValueType().is128BitVector() &&
Op.getNumOperands() == 2 &&
9148 "unexpected CONCAT_VECTORS");
9169 EVT VT =
Op.getValueType();
9172 unsigned Index = cast<ConstantSDNode>(
V2)->getZExtValue();
9175 "Unexpected custom EXTRACT_SUBVECTOR lowering");
9177 "EXTRACT_SUBVECTOR lowering only supported for MVE");
9220 assert(
ST->hasMVEIntegerOps() &&
"Expected MVE!");
9221 EVT VT =
N->getValueType(0);
9223 "Expected a vector i1 type!");
9225 EVT FromVT =
Op.getValueType();
9236 if (!Subtarget->hasMVEIntegerOps())
9239 EVT ToVT =
N->getValueType(0);
9284 EVT FromVT =
N->getOperand(0).getValueType();
9296 if (!Subtarget->hasMVEIntegerOps())
9301 EVT ToVT =
N->getValueType(0);
9305 EVT FromVT =
Op.getValueType();
9333 EVT VT =
N->getValueType(0);
9335 SDNode *BVN =
N->getOperand(0).getNode();
9340 unsigned HiElt = 1 - LoElt;
9345 if (!Lo0 || !Hi0 || !Lo1 || !Hi1)
9361 for (
unsigned i = 0,
e =
N->getNumOperands();
i !=
e; ++
i) {
9362 SDNode *Elt =
N->getOperand(
i).getNode();
9365 unsigned HalfSize = EltSize / 2;
9367 if (!
isIntN(HalfSize,
C->getSExtValue()))
9370 if (!
isUIntN(HalfSize,
C->getZExtValue()))
9409 switch (OrigSimpleTy) {
9425 unsigned ExtOpcode) {
9448 if (ExtendedTy ==
LD->getMemoryVT())
9450 LD->getBasePtr(),
LD->getPointerInfo(),
LD->getAlign(),
9451 LD->getMemOperand()->getFlags());
9457 LD->getChain(),
LD->getBasePtr(),
LD->getPointerInfo(),
9458 LD->getMemoryVT(),
LD->getAlign(),
9459 LD->getMemOperand()->getFlags());
9472 N->getOperand(0)->getValueType(0),
9478 "Expected extending load");
9484 DAG.
getNode(Opcode,
SDLoc(newLoad),
LD->getValueType(0), newLoad);
9493 SDNode *BVN =
N->getOperand(0).getNode();
9503 EVT VT =
N->getValueType(0);
9509 for (
unsigned i = 0;
i != NumElts; ++
i) {
9511 const APInt &CInt =
C->getAPIntValue();
9520 unsigned Opcode =
N->getOpcode();
9522 SDNode *N0 =
N->getOperand(0).getNode();
9523 SDNode *N1 =
N->getOperand(1).getNode();
9531 unsigned Opcode =
N->getOpcode();
9533 SDNode *N0 =
N->getOperand(0).getNode();
9534 SDNode *N1 =
N->getOperand(1).getNode();
9544 EVT VT =
Op.getValueType();
9546 "unexpected type for custom-lowering ISD::MUL");
9547 SDNode *N0 =
Op.getOperand(0).getNode();
9548 SDNode *N1 =
Op.getOperand(1).getNode();
9549 unsigned NewOpc = 0;
9553 if (isN0SExt && isN1SExt)
9558 if (isN0ZExt && isN1ZExt)
9560 else if (isN1SExt || isN1ZExt) {
9594 "unexpected types for extended operands to VMULL");
9595 return DAG.
getNode(NewOpc,
DL, VT, Op0, Op1);
9688 EVT VT =
Op.getValueType();
9690 "unexpected type for custom-lowering ISD::SDIV");
9725 EVT VT =
Op.getValueType();
9727 "unexpected type for custom-lowering ISD::UDIV");
9801 EVT VT =
N->getValueType(0);
9815 Op.getOperand(1), Carry);
9829 Op.getOperand(1), Carry);
9850 EVT ArgVT =
Arg.getValueType();
9862 bool ShouldUseSRet = Subtarget->
isAPCS_ABI();
9864 if (ShouldUseSRet) {
9866 const uint64_t ByteSize =
DL.getTypeAllocSize(RetTy);
9874 Entry.IsSExt =
false;
9875 Entry.IsZExt =
false;
9876 Entry.IsSRet =
true;
9877 Args.push_back(Entry);
9884 Entry.IsSExt =
false;
9885 Entry.IsZExt =
false;
9886 Args.push_back(Entry);
9889 (ArgVT ==
MVT::f64) ? RTLIB::SINCOS_STRET_F64 : RTLIB::SINCOS_STRET_F32;
9899 std::pair<SDValue, SDValue> CallResult =
LowerCallTo(CLI);
9902 return CallResult.first;
9921 EVT VT =
Op.getValueType();
9923 "unexpected type for custom lowering DIV");
9929 const char *
Name =
nullptr;
9939 for (
auto AI : {1, 0}) {
9941 Arg.Node =
Op.getOperand(AI);
9946 CallLoweringInfo CLI(DAG);
9960 ARMTargetLowering::BuildSDIVPow2(
SDNode *
N,
const APInt &Divisor,
9968 const bool MinSize =
ST.hasMinSize();
9969 const bool HasDivide =
ST.isThumb() ?
ST.hasDivideInThumbMode()
9970 :
ST.hasDivideInARMMode();
9974 if (
N->getOperand(0).getValueType().isVector())
9979 if (!(MinSize && HasDivide))
9992 if (Divisor.
sgt(128))
10001 "unexpected type for custom lowering DIV");
10007 return LowerWindowsDIVLibCall(
Op, DAG,
Signed, DBZCHK);
10023 void ARMTargetLowering::ExpandDIV_Windows(
10030 "unexpected type for custom lowering DIV");
10047 EVT MemVT =
LD->getMemoryVT();
10050 "Expected a predicate type!");
10051 assert(MemVT ==
Op.getValueType());
10053 "Expected a non-extending load");
10054 assert(
LD->isUnindexed() &&
"Expected a unindexed load");
10070 LD->getMemOperand());
10086 EVT MemVT =
LD->getMemoryVT();
10087 assert(
LD->isUnindexed() &&
"Loads should be unindexed at this point.");
10089 if (MemVT ==
MVT::i64 && Subtarget->hasV5TEOps() &&
10094 {LD->getChain(), LD->getBasePtr()}, MemVT,
LD->getMemOperand());
10104 EVT MemVT =
ST->getMemoryVT();
10107 "Expected a predicate type!");
10108 assert(MemVT ==
ST->getValue().getValueType());
10109 assert(!
ST->isTruncatingStore() &&
"Expected a non-extending store");
10110 assert(
ST->isUnindexed() &&
"Expected a unindexed store");
10135 ST->getChain(), dl, GRP,
ST->getBasePtr(),
10137 ST->getMemOperand());
10143 EVT MemVT =
ST->getMemoryVT();
10144 assert(
ST->isUnindexed() &&
"Stores should be unindexed at this point.");
10146 if (MemVT ==
MVT::i64 && Subtarget->hasV5TEOps() &&
10161 {ST->getChain(), Lo, Hi, ST->getBasePtr()},
10162 MemVT,
ST->getMemOperand());
10163 }
else if (Subtarget->hasMVEIntegerOps() &&
10180 MVT VT =
Op.getSimpleValueType();
10182 SDValue PassThru =
N->getPassThru();
10193 VT, dl,
N->getChain(),
N->getBasePtr(),
N->getOffset(),
Mask, ZeroVec,
10194 N->getMemoryVT(),
N->getMemOperand(),
N->getAddressingMode(),
10195 N->getExtensionType(),
N->isExpandingLoad());
10200 if (!PassThru.
isUndef() && !PassThruIsCastZero)
10207 if (!
ST->hasMVEIntegerOps())
10211 unsigned BaseOpcode = 0;
10212 switch (
Op->getOpcode()) {
10228 unsigned NumActiveLanes = NumElts;
10230 assert((NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes == 4 ||
10231 NumActiveLanes == 2) &&
10232 "Only expected a power 2 vector size");
10236 while (NumActiveLanes > 4) {
10239 Op0 = DAG.
getNode(BaseOpcode, dl, VT, Op0, Rev);
10240 NumActiveLanes /= 2;
10244 if (NumActiveLanes == 4) {
10254 SDValue Res0 = DAG.
getNode(BaseOpcode, dl, EltVT, Ext0, Ext1,
Op->getFlags());
10255 SDValue Res1 = DAG.
getNode(BaseOpcode, dl, EltVT, Ext2, Ext3,
Op->getFlags());
10256 Res = DAG.
getNode(BaseOpcode, dl, EltVT, Res0, Res1,
Op->getFlags());
10262 Res = DAG.
getNode(BaseOpcode, dl, EltVT, Ext0, Ext1,
Op->getFlags());
10266 if (EltVT !=
Op->getValueType(0))
10273 if (!
ST->hasMVEFloatOps())
10295 SDValue Ops[] = {
N->getOperand(0),
10324 const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
10333 "AtomicCmpSwap on types less than 64 should be legal");
10334 SDValue Ops[] = {
N->getOperand(1),
10339 ARM::CMP_SWAP_64,
SDLoc(
N),
10359 EVT VT =
Op.getValueType();
10368 if (isUnsupportedFloatingType(
LHS.getValueType())) {
10371 if (!
RHS.getNode()) {
10393 SDValue Result = getCMOV(dl, VT, False, True, ARMcc, CCR, Cmp, DAG);
10396 Cmp = getVFPCmp(
LHS,
RHS, DAG, dl, IsSignaling);
10397 Result = getCMOV(dl, VT, Result, True, ARMcc, CCR, Cmp, DAG);
10413 switch (
Op.getOpcode()) {
10445 case ISD::BITCAST:
return ExpandBITCAST(
Op.getNode(), DAG, Subtarget);
10449 case ISD::SREM:
return LowerREM(
Op.getNode(), DAG);
10450 case ISD::UREM:
return LowerREM(
Op.getNode(), DAG);
10474 return LowerDIV_Windows(
Op, DAG,
true);
10478 return LowerDIV_Windows(
Op, DAG,
false);
10484 return LowerSignedALUO(
Op, DAG);
10487 return LowerUnsignedALUO(
Op, DAG);
10516 return LowerDYNAMIC_STACKALLOC(
Op, DAG);
10525 return LowerSPONENTRY(
Op, DAG);
10532 unsigned IntNo = cast<ConstantSDNode>(
N->getOperand(0))->getZExtValue();
10534 if (IntNo == Intrinsic::arm_smlald)
10536 else if (IntNo == Intrinsic::arm_smlaldx)
10538 else if (IntNo == Intrinsic::arm_smlsld)
10540 else if (IntNo == Intrinsic::arm_smlsldx)
10555 N->getOperand(1),
N->getOperand(2),
10567 switch (
N->getOpcode()) {
10574 Res = ExpandBITCAST(
N, DAG, Subtarget);
10583 Res = LowerREM(
N, DAG);
10587 Res = LowerDivRem(
SDValue(
N, 0), DAG);
10636 void ARMTargetLowering::SetupEntryBlockForSjLj(
MachineInstr &
MI,
10641 "ROPI/RWPI not currently supported with SjLj");
10650 bool isThumb = Subtarget->isThumb();
10651 bool isThumb2 = Subtarget->
isThumb2();
10654 unsigned PCAdj = (
isThumb || isThumb2) ? 4 : 8;
10660 : &ARM::GPRRegClass;
10774 : &ARM::GPRnopcRegClass;
10779 unsigned MaxCSNum = 0;
10787 if (!II.isEHLabel())
10790 MCSymbol *Sym = II.getOperand(0).getMCSymbol();
10791 if (!MF->hasCallSiteLandingPad(Sym))
continue;
10794 for (
unsigned Idx : CallSiteIdxs) {
10795 CallSiteNumToLPad[Idx].push_back(&
BB);
10796 MaxCSNum =
std::max(MaxCSNum, Idx);
10803 std::vector<MachineBasicBlock*> LPadList;
10805 LPadList.reserve(CallSiteNumToLPad.
size());
10806 for (
unsigned I = 1;
I <= MaxCSNum; ++
I) {
10809 LPadList.push_back(
MBB);
10814 assert(!LPadList.empty() &&
10815 "No landing pad destinations for the dispatch jump table!");
10829 unsigned trap_opcode;
10830 if (Subtarget->isThumb())
10831 trap_opcode = ARM::tTRAP;
10833 trap_opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl :
ARM::TRAP;
10842 MF->insert(MF->end(), DispatchBB);
10843 MF->insert(MF->end(), DispContBB);
10844 MF->insert(MF->end(), TrapBB);
10848 SetupEntryBlockForSjLj(
MI,
MBB, DispatchBB, FI);
10855 MIB =
BuildMI(DispatchBB, dl,
TII->get(ARM::Int_eh_sjlj_dispatchsetup));
10867 unsigned NumLPads = LPadList.size();
10870 BuildMI(DispatchBB, dl,
TII->get(ARM::t2LDRi12), NewVReg1)
10876 if (NumLPads < 256) {
10877 BuildMI(DispatchBB, dl,
TII->get(ARM::t2CMPri))
10879 .
addImm(LPadList.size())
10883 BuildMI(DispatchBB, dl,
TII->get(ARM::t2MOVi16), VReg1)
10884 .
addImm(NumLPads & 0xFFFF)
10887 unsigned VReg2 = VReg1;
10888 if ((NumLPads & 0xFFFF0000) != 0) {
10890 BuildMI(DispatchBB, dl,
TII->get(ARM::t2MOVTi16), VReg2)
10896 BuildMI(DispatchBB, dl,
TII->get(ARM::t2CMPrr))
10902 BuildMI(DispatchBB, dl,
TII->get(ARM::t2Bcc))
10908 BuildMI(DispContBB, dl,
TII->get(ARM::t2LEApcrelJT), NewVReg3)
10913 BuildMI(DispContBB, dl,
TII->get(ARM::t2ADDrs), NewVReg4)
10920 BuildMI(DispContBB, dl,
TII->get(ARM::t2BR_JT))
10924 }
else if (Subtarget->isThumb()) {
10926 BuildMI(DispatchBB, dl,
TII->get(ARM::tLDRspi), NewVReg1)
10932 if (NumLPads < 256) {
10933 BuildMI(DispatchBB, dl,
TII->get(ARM::tCMPi8))
10943 Align Alignment = MF->getDataLayout().getPrefTypeAlign(
Int32Ty);
10944 unsigned Idx =
ConstantPool->getConstantPoolIndex(
C, Alignment);
10947 BuildMI(DispatchBB, dl,
TII->get(ARM::tLDRpci))
10951 BuildMI(DispatchBB, dl,
TII->get(ARM::tCMPr))
10957 BuildMI(DispatchBB, dl,
TII->get(ARM::tBcc))
10963 BuildMI(DispContBB, dl,
TII->get(ARM::tLSLri), NewVReg2)
10970 BuildMI(DispContBB, dl,
TII->get(ARM::tLEApcrelJT), NewVReg3)
10975 BuildMI(DispContBB, dl,
TII->get(ARM::tADDrr), NewVReg4)
10986 BuildMI(DispContBB, dl,
TII->get(ARM::tLDRi), NewVReg5)
10992 unsigned NewVReg6 = NewVReg5;
10993 if (IsPositionIndependent) {
10995 BuildMI(DispContBB, dl,
TII->get(ARM::tADDrr), NewVReg6)
11002 BuildMI(DispContBB, dl,
TII->get(ARM::tBR_JTr))
11007 BuildMI(DispatchBB, dl,
TII->get(ARM::LDRi12), NewVReg1)
11013 if (NumLPads < 256) {
11014 BuildMI(DispatchBB, dl,
TII->get(ARM::CMPri))
11018 }
else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) {
11020 BuildMI(DispatchBB, dl,
TII->get(ARM::MOVi16), VReg1)
11021 .
addImm(NumLPads & 0xFFFF)
11024 unsigned VReg2 = VReg1;
11025 if ((NumLPads & 0xFFFF0000) != 0) {
11027 BuildMI(DispatchBB, dl,
TII->get(ARM::MOVTi16), VReg2)
11033 BuildMI(DispatchBB, dl,
TII->get(ARM::CMPrr))
11043 Align Alignment = MF->getDataLayout().getPrefTypeAlign(
Int32Ty);
11044 unsigned Idx =
ConstantPool->getConstantPoolIndex(
C, Alignment);
11047 BuildMI(DispatchBB, dl,
TII->get(ARM::LDRcp))
11052 BuildMI(DispatchBB, dl,
TII->get(ARM::CMPrr))
11064 BuildMI(DispContBB, dl,
TII->get(ARM::MOVsi), NewVReg3)
11070 BuildMI(DispContBB, dl,
TII->get(ARM::LEApcrelJT), NewVReg4)
11078 BuildMI(DispContBB, dl,
TII->get(ARM::LDRrs), NewVReg5)
11085 if (IsPositionIndependent) {
11086 BuildMI(DispContBB, dl,
TII->get(ARM::BR_JTadd))
11091 BuildMI(DispContBB, dl,
TII->get(ARM::BR_JTr))
11100 if (SeenMBBs.
insert(CurMBB).second)
11112 while (!Successors.empty()) {
11115 BB->removeSuccessor(SMBB);
11116 MBBLPads.push_back(SMBB);
11121 BB->normalizeSuccProbs();
11128 II =
BB->rbegin(),
IE =
BB->rend(); II !=
IE; ++II) {
11129 if (!II->isCall())
continue;
11133 OI = II->operands_begin(), OE = II->operands_end();
11135 if (!OI->isReg())
continue;
11136 DefRegs[OI->getReg()] =
true;
11141 for (
unsigned i = 0; SavedRegs[
i] != 0; ++
i) {
11142 unsigned Reg = SavedRegs[
i];
11144 !ARM::tGPRRegClass.contains(
Reg) &&
11145 !ARM::hGPRRegClass.contains(
Reg))
11149 if (!Subtarget->isThumb() && !ARM::GPRRegClass.contains(
Reg))
11162 MBBLPad->setIsEHPad(
false);
11165 MI.eraseFromParent();
11178 static unsigned getLdOpcode(
unsigned LdSize,
bool IsThumb1,
bool IsThumb2) {
11180 return LdSize == 16 ? ARM::VLD1q32wb_fixed
11181 : LdSize == 8 ? ARM::VLD1d32wb_fixed : 0;
11183 return LdSize == 4 ? ARM::tLDRi
11184 : LdSize == 2 ? ARM::tLDRHi
11185 : LdSize == 1 ? ARM::tLDRBi : 0;
11187 return LdSize == 4 ? ARM::t2LDR_POST
11188 : LdSize == 2 ? ARM::t2LDRH_POST
11189 : LdSize == 1 ? ARM::t2LDRB_POST : 0;
11190 return LdSize == 4 ? ARM::LDR_POST_IMM
11191 : LdSize == 2 ? ARM::LDRH_POST
11192 : LdSize == 1 ? ARM::LDRB_POST_IMM : 0;
11197 static unsigned getStOpcode(
unsigned StSize,
bool IsThumb1,
bool IsThumb2) {
11199 return StSize == 16 ? ARM::VST1q32wb_fixed
11200 : StSize == 8 ? ARM::VST1d32wb_fixed : 0;
11202 return StSize == 4 ? ARM::tSTRi
11203 : StSize == 2 ? ARM::tSTRHi
11204 : StSize == 1 ? ARM::tSTRBi : 0;
11206 return StSize == 4 ? ARM::t2STR_POST
11207 : StSize == 2 ? ARM::t2STRH_POST
11208 : StSize == 1 ? ARM::t2STRB_POST : 0;
11209 return StSize == 4 ? ARM::STR_POST_IMM
11210 : StSize == 2 ? ARM::STRH_POST
11211 : StSize == 1 ? ARM::STRB_POST_IMM : 0;
11218 unsigned LdSize,
unsigned Data,
unsigned AddrIn,
11219 unsigned AddrOut,
bool IsThumb1,
bool IsThumb2) {
11220 unsigned LdOpc =
getLdOpcode(LdSize, IsThumb1, IsThumb2);
11221 assert(LdOpc != 0 &&
"Should have a load opcode");
11228 }
else if (IsThumb1) {
11239 }
else if (IsThumb2) {
11259 unsigned StSize,
unsigned Data,
unsigned AddrIn,
11260 unsigned AddrOut,
bool IsThumb1,
bool IsThumb2) {
11261 unsigned StOpc =
getStOpcode(StSize, IsThumb1, IsThumb2);
11262 assert(StOpc != 0 &&
"Should have a store opcode");
11269 }
else if (IsThumb1) {
11281 }
else if (IsThumb2) {
11309 unsigned SizeVal =
MI.getOperand(2).getImm();
11310 unsigned Alignment =
MI.getOperand(3).getImm();
11315 unsigned UnitSize = 0;
11320 bool IsThumb2 = Subtarget->
isThumb2();
11321 bool IsThumb = Subtarget->isThumb();
11323 if (Alignment & 1) {
11325 }
else if (Alignment & 2) {
11330 Subtarget->hasNEON()) {
11331 if ((Alignment % 16 == 0) && SizeVal >= 16)
11333 else if ((Alignment % 8 == 0) && SizeVal >= 8)
11342 bool IsNeon = UnitSize >= 8;
11343 TRC = IsThumb ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
11345 VecTRC = UnitSize == 16 ? &ARM::DPairRegClass
11346 : UnitSize == 8 ? &ARM::DPRRegClass
11349 unsigned BytesLeft = SizeVal % UnitSize;
11350 unsigned LoopSize = SizeVal - BytesLeft;
11352 if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) {
11356 unsigned srcIn = src;
11357 unsigned destIn = dest;
11358 for (
unsigned i = 0;
i < LoopSize;
i+=UnitSize) {
11363 IsThumb1, IsThumb2);
11365 IsThumb1, IsThumb2);
11373 for (
unsigned i = 0;
i < BytesLeft;
i++) {
11378 IsThumb1, IsThumb2);
11380 IsThumb1, IsThumb2);
11384 MI.eraseFromParent();
11410 MF->
insert(It, loopMBB);
11411 MF->
insert(It, exitMBB);
11421 unsigned Vtmp = varEnd;
11422 if ((LoopSize & 0xFFFF0000) != 0)
11424 BuildMI(
BB, dl,
TII->get(IsThumb ? ARM::t2MOVi16 : ARM::MOVi16), Vtmp)
11425 .
addImm(LoopSize & 0xFFFF)
11428 if ((LoopSize & 0xFFFF0000) != 0)
11429 BuildMI(
BB, dl,
TII->get(IsThumb ? ARM::t2MOVTi16 : ARM::MOVTi16), varEnd)
11440 unsigned Idx =
ConstantPool->getConstantPoolIndex(
C, Alignment);
11459 BB->addSuccessor(loopMBB);
11488 IsThumb1, IsThumb2);
11490 IsThumb1, IsThumb2);
11502 TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
11511 TII->get(IsThumb1 ? ARM::tBcc : IsThumb2 ? ARM::t2Bcc : ARM::Bcc))
11515 BB->addSuccessor(loopMBB);
11516 BB->addSuccessor(exitMBB);
11520 auto StartOfExit = exitMBB->
begin();
11524 unsigned srcIn = srcLoop;
11525 unsigned destIn = destLoop;
11526 for (
unsigned i = 0;
i < BytesLeft;
i++) {
11531 IsThumb1, IsThumb2);
11533 IsThumb1, IsThumb2);
11538 MI.eraseFromParent();
11550 "__chkstk is only supported on Windows");
11551 assert(Subtarget->
isThumb2() &&
"Windows on ARM requires Thumb-2 mode");
11571 switch (
TM.getCodeModel()) {
11613 MI.eraseFromParent();
11637 .
addReg(
MI.getOperand(0).getReg())
11645 MI.eraseFromParent();
11669 if (miI ==
BB->end()) {
11671 if (Succ->isLiveIn(ARM::CPSR))
11677 SelectItr->addRegisterKilled(ARM::CPSR,
TRI);
11690 BuildMI(TpEntry, Dl,
TII->get(ARM::t2ADDri), AddDestReg)
11697 BuildMI(TpEntry, Dl,
TII->get(ARM::t2LSRri), LsrDestReg)
11704 BuildMI(TpEntry, Dl,
TII->get(ARM::t2WhileLoopSetup), TotalIterationsReg)
11707 BuildMI(TpEntry, Dl,
TII->get(ARM::t2WhileLoopStart))
11708 .
addUse(TotalIterationsReg)
11715 return TotalIterationsReg;
11726 Register TotalIterationsReg,
bool IsMemcpy) {
11753 Register RemainingLoopIterationsReg =
11756 .
addUse(TotalIterationsReg)
11758 .
addUse(RemainingLoopIterationsReg)
11765 .
addUse(ElementCountReg)
11767 .
addUse(RemainingElementsReg)
11772 BuildMI(TpLoopBody, Dl,
TII->get(ARM::MVE_VCTP8), VccrReg)
11773 .
addUse(PredCounterPhiReg)
11778 BuildMI(TpLoopBody, Dl,
TII->get(ARM::t2SUBri), RemainingElementsReg)
11779 .
addUse(PredCounterPhiReg)
11788 BuildMI(TpLoopBody, Dl,
TII->get(ARM::MVE_VLDRBU8_post))
11797 SrcValueReg = OpSrcReg;
11799 BuildMI(TpLoopBody, Dl,
TII->get(ARM::MVE_VSTRBU8_post))
11810 BuildMI(TpLoopBody, Dl,
TII->get(ARM::t2LoopDec), RemainingLoopIterationsReg)
11811 .
addUse(LoopCounterPhiReg)
11814 BuildMI(TpLoopBody, Dl,
TII->get(ARM::t2LoopEnd))
11815 .
addUse(RemainingLoopIterationsReg)
11828 bool isThumb2 = Subtarget->
isThumb2();
11829 switch (
MI.getOpcode()) {
11836 case ARM::tLDR_postidx: {
11840 .
add(
MI.getOperand(2))
11841 .
add(
MI.getOperand(3))
11842 .
add(
MI.getOperand(4))
11843 .
add(
MI.getOperand(0))
11845 MI.eraseFromParent();
11849 case ARM::MVE_MEMCPYLOOPINST:
11850 case ARM::MVE_MEMSETLOOPINST: {
11880 Register OpDestReg =
MI.getOperand(0).getReg();
11881 Register OpSrcReg =
MI.getOperand(1).getReg();
11882 Register OpSizeReg =
MI.getOperand(2).getReg();
11901 TpExit =
BB->splitAt(
MI,
false);
11902 if (TpExit ==
BB) {
11903 assert(
BB->canFallThrough() &&
"Exit Block must be Fallthrough of the "
11904 "block containing memcpy/memset Pseudo");
11905 TpExit =
BB->getFallThrough();
11909 TpExit =
BB->splitAt(
MI,
false);
11917 bool IsMemcpy =
MI.getOpcode() == ARM::MVE_MEMCPYLOOPINST;
11919 OpDestReg, OpSizeReg, TotalIterationsReg, IsMemcpy);
11934 MI.eraseFromParent();
11944 case ARM::t2STR_preidx:
11945 MI.setDesc(
TII->get(ARM::t2STR_PRE));
11947 case ARM::t2STRB_preidx:
11948 MI.setDesc(
TII->get(ARM::t2STRB_PRE));
11950 case ARM::t2STRH_preidx:
11951 MI.setDesc(
TII->get(ARM::t2STRH_PRE));
11954 case ARM::STRi_preidx:
11955 case ARM::STRBi_preidx: {
11956 unsigned NewOpc =
MI.getOpcode() == ARM::STRi_preidx ? ARM::STR_PRE_IMM
11957 : ARM::STRB_PRE_IMM;
11959 unsigned Offset =
MI.getOperand(4).getImm();
11967 .
add(
MI.getOperand(0))
11968 .
add(
MI.getOperand(1))
11969 .
add(
MI.getOperand(2))
11971 .
add(
MI.getOperand(5))
11972 .
add(
MI.getOperand(6))
11974 MI.eraseFromParent();
11977 case ARM::STRr_preidx:
11978 case ARM::STRBr_preidx:
11979 case ARM::STRH_preidx: {
11981 switch (
MI.getOpcode()) {
11983 case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG;
break;
11984 case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG;
break;
11985 case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE;
break;
11990 MI.eraseFromParent();
11994 case ARM::tMOVCCr_pseudo: {
12012 F->insert(It, copy0MBB);
12013 F->insert(It, sinkMBB);
12017 if (!
MI.killsRegister(ARM::CPSR) &&
12028 BB->addSuccessor(copy0MBB);
12029 BB->addSuccessor(sinkMBB);
12033 .
addImm(
MI.getOperand(3).getImm())
12034 .
addReg(
MI.getOperand(4).getReg());
12042 BB->addSuccessor(sinkMBB);
12049 .
addReg(
MI.getOperand(1).getReg())
12051 .
addReg(
MI.getOperand(2).getReg())
12054 MI.eraseFromParent();
12059 case ARM::BCCZi64: {
12065 bool RHSisZero =
MI.getOpcode() == ARM::BCCZi64;
12070 BuildMI(
BB, dl,
TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12074 BuildMI(
BB, dl,
TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12080 BuildMI(
BB, dl,
TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
12084 BuildMI(
BB, dl,
TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
12094 BuildMI(
BB, dl,
TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
12103 MI.eraseFromParent();
12107 case ARM::Int_eh_sjlj_setjmp:
12108 case ARM::Int_eh_sjlj_setjmp_nofp:
12109 case ARM::tInt_eh_sjlj_setjmp:
12110 case ARM::t2Int_eh_sjlj_setjmp:
12111 case ARM::t2Int_eh_sjlj_setjmp_nofp:
12114 case ARM::Int_eh_sjlj_setup_dispatch:
12115 EmitSjLjDispatchBlock(
MI,
BB);
12138 Fn->
insert(BBI, SinkBB);
12140 Register ABSSrcReg =
MI.getOperand(1).getReg();
12141 Register ABSDstReg =
MI.getOperand(0).getReg();
12142 bool ABSSrcKIll =
MI.getOperand(1).isKill();
12143 bool isThumb2 = Subtarget->
isThumb2();
12148 isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass);
12155 BB->addSuccessor(RSBBB);
12156 BB->addSuccessor(SinkBB);
12162 BuildMI(
BB, dl,
TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12169 TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).
addMBB(SinkBB)
12176 TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
12190 MI.eraseFromParent();
12195 case ARM::COPY_STRUCT_BYVAL_I32:
12197 return EmitStructByval(
MI,
BB);
12199 return EmitLowered__chkstk(
MI,
BB);
12201 return EmitLowered__dbzchk(
MI,
BB);
12218 if (!Node->hasAnyUseOfValue(0)) {
12219 MI.getOperand(0).setIsDead(
true);
12221 if (!Node->hasAnyUseOfValue(1)) {
12222 MI.getOperand(1).setIsDead(
true);
12226 for (
unsigned I = 0;
I !=
MI.getOperand(4).getImm(); ++
I) {
12228 : &ARM::GPRRegClass);
12253 MCID = &
TII->get(NewOpc);
12256 MI.getDesc().getNumOperands() + 5 -
MI.getDesc().getSize()
12257 &&
"converted opcode should be the same except for cc_out"
12258 " (and, on Thumb1, pred)");
12268 MI.addOperand(
MI.getOperand(1));
12269 MI.removeOperand(1);
12273 for (
unsigned i =
MI.getNumOperands();
i--;) {
12275 if (
op.isReg() &&
op.isUse()) {
12278 MI.tieOperands(DefIdx,
i);
12292 if (!
MI.hasOptionalDef() || !MCID->
operands()[ccOutIdx].isOptionalDef()) {
12293 assert(!NewOpc &&
"Optional cc_out operand required");
12298 bool definesCPSR =
false;
12299 bool deadCPSR =
false;
12304 definesCPSR =
true;
12307 MI.removeOperand(
i);
12311 if (!definesCPSR) {
12312 assert(!NewOpc &&
"Optional cc_out operand required");
12315 assert(deadCPSR == !Node->hasAnyUseOfValue(1) &&
"inconsistent dead flag");
12317 assert(!
MI.getOperand(ccOutIdx).getReg() &&
12318 "expect uninitialized optional cc_out operand");
12356 switch (
N->getOpcode()) {
12357 default:
return false;
12359 CC =
N->getOperand(0);
12381 EVT VT =
N->getValueType(0);
12382 CC =
N->getOperand(0);
12427 bool AllOnes =
false) {
12429 EVT VT =
N->getValueType(0);
12432 bool SwapSelectOps;
12434 NonConstantVal, DAG))
12440 OtherOp, NonConstantVal);
12485 if (!
N->getValueType(0).is64BitVector())
12493 EVT VT =
N->getValueType(0);
12496 Ops.push_back(DAG.
getConstant(Intrinsic::arm_neon_vpadd, dl,
12532 EVT VT =
N->getValueType(0);
12538 Opcode = Intrinsic::arm_neon_vpaddls;
12540 Opcode = Intrinsic::arm_neon_vpaddlu;
12568 EVT VT =
N->getValueType(0);
12583 unsigned nextIndex = 0;
12606 ||
C1->getZExtValue() != nextIndex+1)
12630 Ops.push_back(DAG.
getConstant(Intrinsic::arm_neon_vpaddls, dl,
12634 Ops.push_back(Vec);
12688 if (
auto Const = dyn_cast<ConstantSDNode>(
SRA.getOperand(1))) {
12689 if (Const->getZExtValue() != 31)
12694 if (
SRA.getOperand(0) !=
Mul)
12698 SDLoc dl(AddcNode);
12699 unsigned Opcode = 0;
12734 SDValue resNode(AddcNode, 0);
12763 "Expect an ADDE or SUBE");
12767 "ADDE node has the wrong inputs");
12786 "Expect ADDC with two result values. First: i32");
12806 bool IsLeftOperandMUL =
false;
12811 IsLeftOperandMUL =
true;
12822 SDValue *LowAddSub =
nullptr;
12825 if ((AddeSubeOp0 != MULOp.
getValue(1)) && (AddeSubeOp1 != MULOp.
getValue(1)))
12828 if (IsLeftOperandMUL)
12829 HiAddSub = &AddeSubeOp1;
12831 HiAddSub = &AddeSubeOp0;
12836 if (AddcSubcOp0 == MULOp.
getValue(0)) {
12837 LoMul = &AddcSubcOp0;
12838 LowAddSub = &AddcSubcOp1;
12840 if (AddcSubcOp1 == MULOp.
getValue(0)) {
12841 LoMul = &AddcSubcOp1;
12842 LowAddSub = &AddcSubcOp0;
12850 if (AddcSubcNode == HiAddSub->getNode() ||
12866 if (Subtarget->hasV6Ops() && Subtarget->hasDSP() && Subtarget->
useMulOps() &&
12871 Ops.push_back(*HiAddSub);
12880 return SDValue(AddeSubeNode, 0);
12887 Ops.push_back(*LowAddSub);
12888 Ops.push_back(*HiAddSub);
12901 return SDValue(AddeSubeNode, 0);
12913 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
12922 SDNode *UmlalNode =
nullptr;
12961 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
12966 SDNode* AddcNode =
N->getOperand(2).getNode();
12967 SDNode* AddeNode =
N->getOperand(3).getNode();
12975 {N->getOperand(0), N->getOperand(1),
12976 AddcNode->getOperand(0), AddcNode->getOperand(1)});
13000 int32_t imm =
C->getSExtValue();
13006 return DAG.
getNode(Opcode,
DL,
N->getVTList(),
N->getOperand(0),
RHS);
13021 int64_t imm =
C->getSExtValue();
13032 return DAG.
getNode(Opcode,
DL,
N->getVTList(),
13033 N->getOperand(0),
RHS,
N->getOperand(2));
13045 if (!Subtarget->hasMVEIntegerOps())
13058 SetCC =
N->getOperand(0);
13061 CC = cast<CondCodeSDNode>(SetCC->
getOperand(2))->get();
13065 LHS =
N->getOperand(0);
13066 RHS =
N->getOperand(1);
13067 CC = cast<CondCodeSDNode>(
N->getOperand(4))->get();
13074 unsigned int Opcode = 0;
13103 switch (
TrueVal->getOpcode()) {
13125 EVT LeftType =
LHS->getValueType(0);
13126 EVT RightType =
RHS->getValueType(0);
13129 if (LeftType != VectorScalarType || RightType != VectorScalarType)
13154 EVT VT =
N->getValueType(0);
13162 Shft =
N->getOperand(0);
13168 cast<CondCodeSDNode>(Cmp.getOperand(2))->get() !=
ISD::SETLT ||
13169 Cmp.getOperand(0) !=
N->getOperand(1) ||
13170 Cmp.getOperand(1) !=
N->getOperand(2))
13172 Shft =
N->getOperand(1);
13187 case (1 << 15) - 1:
13191 case (1ULL << 31) - 1:
13223 unsigned LegalLanes = 128 / (ShftAmt + 1);
13247 for (
unsigned I = 0;
I < NumParts; ++
I) {
13264 if (!Subtarget->hasMVEIntegerOps())
13279 if (
N->getOperand(0).getOpcode() !=
ISD::XOR)
13289 if (!Const || !Const->isOne())
13307 EVT VT =
N->getValueType(0);
13309 if (!Subtarget->hasMVEIntegerOps() ||
13338 Opc = Intrinsic::arm_mve_vctp64;
13341 Opc = Intrinsic::arm_mve_vctp32;
13344 Opc = Intrinsic::arm_mve_vctp16;
13347 Opc = Intrinsic::arm_mve_vctp8;
13413 EVT VT =
N->getValueType(0);
13419 switch (
Op.getOpcode()) {
13436 !isa<ConstantSDNode>(N0) && N1->
hasOneUse()) {
13444 unsigned N0RedOp = 0;
13451 unsigned N1RedOp = 0;
13465 if (
SDValue R = DistrubuteAddAddVecReduce(N0, N1))
13467 if (
SDValue R = DistrubuteAddAddVecReduce(N1, N0))
13474 auto DistrubuteVecReduceLoad = [&](
SDValue N0,
SDValue N1,
bool IsForward) {
13488 LoadSDNode *Load0 = dyn_cast<LoadSDNode>(N0);
13489 LoadSDNode *Load1 = dyn_cast<LoadSDNode>(N1);
13498 if (!BaseLocDecomp0.getBase() ||
13499 BaseLocDecomp0.getBase() != BaseLocDecomp1.getBase() ||
13500 !BaseLocDecomp0.hasValidOffset() || !BaseLocDecomp1.hasValidOffset())
13502 if (BaseLocDecomp0.getOffset() < BaseLocDecomp1.getOffset())
13504 if (BaseLocDecomp0.getOffset() > BaseLocDecomp1.getOffset())
13514 if (IsBefore < 0) {
13517 }
else if (IsBefore > 0) {
13530 }
else if (IsForward && IsVecReduce(N0) && IsVecReduce(N1) &&
13540 if (!IsVecReduce(N0) || !IsVecReduce(N1))
13543 if (IsKnownOrderedLoad(N1.getOperand(0), N0.
getOperand(0)) >= 0)
13550 if (
SDValue R = DistrubuteVecReduceLoad(N0, N1,
true))
13552 if (
SDValue R = DistrubuteVecReduceLoad(N1, N0,
false))
13559 if (!Subtarget->hasMVEIntegerOps())
13565 EVT VT =
N->getValueType(0);
13581 auto MakeVecReduce = [&](
unsigned Opcode,
unsigned OpcodeA,
SDValue NA,
13603 unsigned S = VecRed->
getOpcode() == OpcodeA ? 2 : 0;
13652 "Expected shift op");
13670 if (
auto *Const = dyn_cast<ConstantSDNode>(N1->
getOperand(1))) {
13671 if (Const->getAPIntValue().ult(256))
13674 Const->getAPIntValue().sgt(-256))
13690 (
N->getOperand(0).getOpcode() ==
ISD::SHL ||
13691 N->getOperand(0).getOpcode() ==
ISD::SRL) &&
13692 "Expected XOR(SHIFT) pattern");
13695 auto *XorC = dyn_cast<ConstantSDNode>(
N->getOperand(1));
13696 auto *ShiftC = dyn_cast<ConstantSDNode>(
N->getOperand(0).getOperand(1));
13697 if (XorC && ShiftC) {
13698 unsigned MaskIdx, MaskLen;
13699 if (XorC->getAPIntValue().isShiftedMask(MaskIdx, MaskLen)) {
13700 unsigned ShiftAmt = ShiftC->getZExtValue();
13701 unsigned BitWidth =
N->getValueType(0).getScalarSizeInBits();
13702 if (
N->getOperand(0).getOpcode() ==
ISD::SHL)
13703 return MaskIdx == ShiftAmt && MaskLen == (
BitWidth - ShiftAmt);
13704 return MaskIdx == 0 && MaskLen == (
BitWidth - ShiftAmt);
13714 N->getOperand(0).getOpcode() ==
ISD::SRL) ||
13716 N->getOperand(0).getOpcode() ==
ISD::SHL)) &&
13717 "Expected shift-shift mask");
13729 if (!Subtarget->hasNEON()) {
13748 return Subtarget->hasFP64();
13751 return Subtarget->hasMVEFloatOps();
13780 if (
ST->isThumb() &&
ST->isThumb1Only())
13784 for (
auto *U :
N->uses()) {
13798 if (isa<ConstantSDNode>(U->
getOperand(0)) ||
13814 if (
N->getOperand(0).getOpcode() !=
ISD::SHL)
13819 auto *C1ShlC2 = dyn_cast<ConstantSDNode>(
N->getOperand(1));
13820 auto *C2 = dyn_cast<ConstantSDNode>(
SHL.getOperand(1));
13821 if (!C1ShlC2 || !C2)
13824 APInt C2Int = C2->getAPIntValue();
13825 APInt C1Int = C1ShlC2->getAPIntValue();
13827 if (C2Int.
uge(C2Width))
13833 if ((C1Int &
Mask) != C1Int)
13840 auto LargeImm = [](
const APInt &
Imm) {
13841 unsigned Zeros =
Imm.countLeadingZeros() +
Imm.countTrailingZeros();
13842 return Imm.getBitWidth() - Zeros > 8;
13845 if (LargeImm(C1Int) || LargeImm(C2Int))
13857 SHL.dump();
N->dump());
13901 CSINC.getOperand(0)),
13903 CSINC.getOperand(3));
13922 if (!Subtarget->hasMVEIntegerOps() || !
N->getValueType(0).isVector())
13964 if (!Subtarget->hasVMLxForwarding())
13983 EVT VT =
N->getValueType(0);
13994 EVT VT =
N->getValueType(0);
14004 EVT VT = cast<VTSDNode>(
Op->getOperand(1))->getVT();
14006 return Op->getOperand(0);
14020 And =
And->getOperand(0);
14034 return And->getOperand(0);
14039 if (
SDValue Op0 = IsSignExt(N0)) {
14040 if (
SDValue Op1 = IsSignExt(N1)) {
14046 if (
SDValue Op0 = IsZeroExt(N0)) {
14047 if (
SDValue Op1 = IsZeroExt(N1)) {
14062 EVT VT =
N->getValueType(0);
14063 if (Subtarget->hasMVEIntegerOps() && VT ==
MVT::v2i64)
14081 int64_t MulAmt =
C->getSExtValue();
14082 unsigned ShiftAmt = llvm::countr_zero<uint64_t>(MulAmt);
14084 ShiftAmt = ShiftAmt & (32 - 1);
14089 MulAmt >>= ShiftAmt;
14092 if (llvm::has_single_bit<uint32_t>(MulAmt - 1)) {
14100 }
else if (llvm::has_single_bit<uint32_t>(MulAmt + 1)) {
14112 if (llvm::has_single_bit<uint32_t>(MulAmtAbs + 1)) {
14120 }
else if (llvm::has_single_bit<uint32_t>(MulAmtAbs - 1)) {
14159 if (
C1 == 255 ||
C1 == 65535)
14162 SDNode *N0 =
N->getOperand(0).getNode();
14176 if (!C2 || C2 >= 32)
14220 if (Trailing == C2 && C2 + C3 < 32) {
14233 if (Leading == C2 && C2 + C3 < 32) {
14253 EVT VT =
N->getValueType(0);
14260 APInt SplatBits, SplatUndef;
14261 unsigned SplatBitSize;
14263 if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) &&
14264 BVN->
isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
14265 if (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32 ||
14266 SplatBitSize == 64) {
14300 if (!Subtarget->hasV6Ops() ||
14301 (Subtarget->isThumb() &&
14302 (!Subtarget->hasThumb2() || !Subtarget->hasDSP())))
14309 SRL =
OR->getOperand(1);
14310 SHL =
OR->getOperand(0);
14317 if ((
SRL.getOperand(0).getNode() !=
SHL.getOperand(0).getNode()) ||
14321 SDNode *SMULLOHI =
SRL.getOperand(0).getNode();
14322 if (
SRL.getOperand(0) !=
SDValue(SMULLOHI, 0) ||
14342 unsigned Opcode = 0;
14343 if (
isS16(OpS16, DAG))
14361 if (Subtarget->
isThumb1Only() || !Subtarget->hasV6T2Ops())
14364 EVT VT =
N->getValueType(0);
14392 if (
Mask == 0xffff)
14399 if ((Val & ~
Mask) != Val)
14424 (
Mask == ~Mask2)) {
14427 if (Subtarget->hasDSP() &&
14428 (
Mask == 0xffff ||
Mask == 0xffff0000))
14441 (~
Mask == Mask2)) {
14444 if (Subtarget->hasDSP() &&
14445 (Mask2 == 0xffff || Mask2 == 0xffff0000))
14466 unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue();
14518 EVT VT =
N->getValueType(0);
14523 auto IsFreelyInvertable = [&](
SDValue V) {
14530 if (!(IsFreelyInvertable(N0) || IsFreelyInvertable(N1)))
14546 EVT VT =
N->getValueType(0);
14556 APInt SplatBits, SplatUndef;
14557 unsigned SplatBitSize;
14559 if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) &&
14560 BVN->
isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
14561 if (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32 ||
14562 SplatBitSize == 64) {
14598 unsigned SplatBitSize;
14601 APInt SplatBits0, SplatBits1;
14605 if (BVN0 && BVN0->
isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
14606 HasAnyUndefs) && !HasAnyUndefs) {
14607 if (BVN1 && BVN1->
isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
14608 HasAnyUndefs) && !HasAnyUndefs) {
14613 SplatBits0 == ~SplatBits1) {
14643 EVT VT =
N->getValueType(0);
14658 if (Subtarget->hasMVEIntegerOps()) {
14689 ToMask = ~cast<ConstantSDNode>(
N->getOperand(2))->getAPIntValue();
14695 isa<ConstantSDNode>(
From->getOperand(1))) {
14696 APInt Shift = cast<ConstantSDNode>(
From->getOperand(1))->getAPIntValue();
14697 assert(
Shift.getLimitedValue() < 32 &&
"Shift too large!");
14698 FromMask <<=
Shift.getLimitedValue(31);
14709 unsigned LastActiveBitInA = A.countTrailingZeros();
14710 unsigned FirstActiveBitInB =
B.getBitWidth() -
B.countLeadingZeros() - 1;
14711 return LastActiveBitInA - 1 == FirstActiveBitInB;
14716 APInt ToMask, FromMask;
14724 APInt NewToMask, NewFromMask;
14726 if (NewFrom !=
From)
14730 if ((NewToMask & ToMask).getBoolValue())
14755 unsigned InvMask = cast<ConstantSDNode>(
N->getOperand(2))->getZExtValue();
14757 unsigned Width = llvm::bit_width<unsigned>(~InvMask) - LSB;
14759 static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&
14760 "undefined behavior");
14763 if ((
Mask & (~Mask2)) == 0)
14765 N->getOperand(0), N1.
getOperand(0),
N->getOperand(2));
14772 APInt ToMask1, FromMask1;
14775 APInt ToMask2, FromMask2;
14781 APInt NewFromMask = FromMask1 | FromMask2;
14782 APInt NewToMask = ToMask1 | ToMask2;
14784 EVT VT =
N->getValueType(0);
14787 if (NewFromMask[0] == 0)
14800 APInt ToMask1 = ~
N->getConstantOperandAPInt(2);
14803 if (!N0.
hasOneUse() || (ToMask1 & ToMask2) != 0 ||
14807 EVT VT =
N->getValueType(0);
14810 N->getOperand(1),
N->getOperand(2));
14824 SDValue CSInc = Cmp->getOperand(0);
14874 if (
N->getConstantOperandVal(2) ==
ARMCC::EQ)
14878 if (
N->getConstantOperandVal(2) ==
ARMCC::NE)
14893 SDValue InDouble =
N->getOperand(0);
14902 !cast<LoadSDNode>(InNode)->isVolatile()) {
14911 LD->getAlign(),
LD->getMemOperand()->getFlags());
14917 LD->getPointerInfo().getWithOffset(4),
14919 LD->getMemOperand()->getFlags());
14931 isa<ConstantSDNode>(InDouble.
getOperand(1))) {
14952 if (!Subtarget->
isLittle() && BVSwap)
14962 if (isa<ConstantSDNode>(BV.
getOperand(2))) {
14970 if (!Subtarget->
isLittle() && BVSwap)
15013 if (Copy.getValueType() ==
MVT::f32 &&
15015 SDValue Ops[] = {Copy->getOperand(0), Copy->getOperand(1)};
15023 if (
LoadSDNode *LN0 = dyn_cast<LoadSDNode>(Op0)) {
15024 if (LN0->hasOneUse() && LN0->isUnindexed() &&
15028 LN0->getBasePtr(), LN0->getMemOperand());
15046 EVT VT =
N->getValueType(0);
15080 unsigned NumElts =
N->getValueType(0).getVectorNumElements();
15081 for (
unsigned i = 0;
i < NumElts; ++
i) {
15082 SDNode *Elt =
N->getOperand(
i).getNode();
15099 if (
N->getNumOperands() == 2)
15105 EVT VT =
N->getValueType(0);
15111 for (
unsigned i = 0;
i < NumElts; ++
i) {
15137 EVT VT =
N->getValueType(0);
15150 Use->getValueType(0).isFloatingPoint())
15158 unsigned NumOfBitCastedElts = 0;
15160 unsigned NumOfRelevantElts = NumElts;
15161 for (
unsigned Idx = 0; Idx < NumElts; ++Idx) {
15166 ++NumOfBitCastedElts;
15167 }
else if (Elt.
isUndef() || isa<ConstantSDNode>(Elt))
15170 --NumOfRelevantElts;
15174 if (NumOfBitCastedElts <= NumOfRelevantElts / 2)
15192 for (
unsigned Idx = 0 ; Idx < NumElts; ++Idx) {
15216 EVT VT =
N->getValueType(0);
15223 if (
Op->getOperand(0).getValueType() == VT)
15224 return Op->getOperand(0);
15250 EVT VT =
N->getValueType(0);
15255 if (
ST->isLittle())
15265 if (
Op->getOperand(0).getValueType() == VT)
15266 return Op->getOperand(0);
15275 if (!Subtarget->hasMVEIntegerOps())
15278 EVT VT =
N->getValueType(0);
15310 EVT VT =
N->getValueType(0);
15311 SDNode *Elt =
N->getOperand(1).getNode();
15326 Vec, V,
N->getOperand(2));
15336 EVT VT =
N->getValueType(0);
15346 Ext =
Ext.getOperand(0);
15348 !isa<ConstantSDNode>(
Ext.getOperand(1)) ||
15349 Ext.getConstantOperandVal(1) % 2 != 0)
15351 if (
Ext->use_size() == 1 &&
15359 unsigned Lane =
Ext.getConstantOperandVal(1);
15365 return V->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15366 isa<ConstantSDNode>(V->getOperand(1)) &&
15367 V->getConstantOperandVal(1) == Lane + 1 &&
15368 V->getOperand(0).getResNo() == ResNo;
15370 if (OtherIt == Op0->
uses().end())
15375 SDValue OtherExt(*OtherIt, 0);
15400 EVT VT =
N->getValueType(0);
15413 while (
X.getValueType() != VT &&
X->getOpcode() ==
ISD::BITCAST)
15414 X =
X->getOperand(0);
15415 if (
X.getValueType() == VT)
15421 isa<ConstantSDNode>(
N->getOperand(1)) &&
15423 return Op0.
getOperand(
N->getConstantOperandVal(1));
15428 isa<ConstantSDNode>(
N->getOperand(1)) &&
15433 unsigned Offset =
N->getConstantOperandVal(1);
15445 unsigned Idx =
N->getConstantOperandVal(1);
15459 EVT VT =
N->getValueType(0);
15463 cast<VTSDNode>(
N->getOperand(1))->getVT() ==
15464 Op.getOperand(0).getValueType().getScalarType())
15474 SDValue SubVec =
N->getOperand(1);
15475 uint64_t IdxVal =
N->getConstantOperandVal(2);
15486 if (IdxVal == 0 && Vec.
isUndef())
15492 (IdxVal != 0 && IdxVal != NumSubElts))
15565 EVT VT =
N->getValueType(0);
15576 unsigned HalfElts = NumElts/2;
15578 for (
unsigned n = 0;
n < NumElts; ++
n) {
15581 if (MaskElt < (
int)HalfElts)
15583 else if (MaskElt >= (
int)NumElts && MaskElt < (
int)(NumElts + HalfElts))
15584 NewElt = HalfElts + MaskElt - NumElts;
15585 NewMask.push_back(NewElt);
15611 bool SimpleConstIncOnly,
15619 bool isLoadOp =
true;
15620 bool isLaneOp =
false;
15623 bool hasAlignment =
true;
15624 unsigned NewOpc = 0;
15625 unsigned NumVecs = 0;
15626 if (
Target.isIntrinsic) {
15627 unsigned IntNo = cast<ConstantSDNode>(
N->getOperand(1))->getZExtValue();
15631 case Intrinsic::arm_neon_vld1:
15635 case Intrinsic::arm_neon_vld2:
15639 case Intrinsic::arm_neon_vld3:
15643 case Intrinsic::arm_neon_vld4:
15647 case Intrinsic::arm_neon_vld1x2:
15650 hasAlignment =
false;
15652 case Intrinsic::arm_neon_vld1x3:
15655 hasAlignment =
false;
15657 case Intrinsic::arm_neon_vld1x4:
15660 hasAlignment =
false;
15662 case Intrinsic::arm_neon_vld2dup:
15666 case Intrinsic::arm_neon_vld3dup:
15670 case Intrinsic::arm_neon_vld4dup:
15674 case Intrinsic::arm_neon_vld2lane:
15679 case Intrinsic::arm_neon_vld3lane:
15684 case Intrinsic::arm_neon_vld4lane:
15689 case Intrinsic::arm_neon_vst1:
15694 case Intrinsic::arm_neon_vst2:
15699 case Intrinsic::arm_neon_vst3:
15704 case Intrinsic::arm_neon_vst4:
15709 case Intrinsic::arm_neon_vst2lane:
15715 case Intrinsic::arm_neon_vst3lane:
15721 case Intrinsic::arm_neon_vst4lane:
15727 case Intrinsic::arm_neon_vst1x2:
15731 hasAlignment =
false;
15733 case Intrinsic::arm_neon_vst1x3:
15737 hasAlignment =
false;
15739 case Intrinsic::arm_neon_vst1x4:
15743 hasAlignment =
false;
15748 switch (
N->getOpcode()) {
15784 VecTy =
N->getValueType(0);
15785 }
else if (
Target.isIntrinsic) {
15786 VecTy =
N->getOperand(
Target.AddrOpIdx + 1).getValueType();
15789 "Node has to be a load, a store, or an intrinsic!");
15790 VecTy =
N->getOperand(1).getValueType();
15798 if (isLaneOp || isVLDDUPOp)
15801 if (NumBytes >= 3 * 16 &&
User.ConstInc != NumBytes) {
15807 if (SimpleConstIncOnly &&
User.ConstInc != NumBytes)
15813 EVT AlignedVecTy = VecTy;
15830 if (isa<LSBaseSDNode>(
N)) {
15833 assert(NumVecs == 1 &&
"Unexpected multi-element generic load/store.");
15834 assert(!isLaneOp &&
"Unexpected generic load/store lane.");
15845 Alignment =
Align(1);
15851 unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
15853 for (
n = 0;
n < NumResultVecs; ++
n)
15854 Tys[
n] = AlignedVecTy;
15861 Ops.push_back(
N->getOperand(0));
15862 Ops.push_back(
N->getOperand(
Target.AddrOpIdx));
15863 Ops.push_back(
User.Inc);
15867 Ops.push_back(StN->getValue());
15871 unsigned LastOperand =
15872 hasAlignment ?
N->getNumOperands() - 1 :
N->getNumOperands();
15873 for (
unsigned i =
Target.AddrOpIdx + 1;
i < LastOperand; ++
i)
15874 Ops.push_back(
N->getOperand(
i));
15882 if (AlignedVecTy != VecTy &&
N->getOpcode() ==
ISD::STORE) {
15883 SDValue &StVal = Ops[Ops.size() - 2];
15893 for (
unsigned i = 0;
i < NumResultVecs; ++
i)
15898 if (AlignedVecTy != VecTy &&
N->getOpcode() ==
ISD::LOAD) {
15899 SDValue &LdVal = NewResults[0];
15903 NewResults.push_back(
SDValue(UpdN.
getNode(), NumResultVecs + 1));
15935 switch (
N->getOpcode()) {
15938 if (isa<ConstantSDNode>(
N->getOperand(1))) {
15939 *
Ptr =
N->getOperand(0);
15940 *CInc =
N->getOperand(1);
15946 if (isa<ConstantSDNode>(
N->getOperand(2))) {
15947 *
Ptr =
N->getOperand(1);
15948 *CInc =
N->getOperand(2);
15965 Worklist.push_back(
N);
15966 Worklist.push_back(
User);
15983 const unsigned AddrOpIdx = ((isIntrinsic ||
isStore) ? 2 : 1);
15992 UE =
Addr.getNode()->use_end(); UI != UE; ++UI) {
15994 if (UI.getUse().getResNo() !=
Addr.getResNo() ||
15999 unsigned ConstInc =
16003 BaseUpdates.push_back({
User, Inc, ConstInc});
16017 if (UI.getUse().getResNo() !=
Base.getResNo() ||
User ==
Addr.getNode() ||
16022 unsigned UserOffset =
16025 if (!UserOffset || UserOffset <=
Offset)
16028 unsigned NewConstInc = UserOffset -
Offset;
16030 BaseUpdates.push_back({
User, NewInc, NewConstInc});
16038 unsigned NumValidUpd = BaseUpdates.size();
16039 for (
unsigned I = 0;
I < NumValidUpd;) {
16043 std::swap(BaseUpdates[
I], BaseUpdates[NumValidUpd]);
16051 BaseUpdates.
resize(NumValidUpd);
16058 return LHS.ConstInc < RHS.ConstInc;
16087 unsigned IntNo = cast<ConstantSDNode>(
N->getOperand(1))->getZExtValue();
16088 if (IntNo == Intrinsic::arm_mve_vst2q &&
16089 cast<ConstantSDNode>(
N->getOperand(5))->getZExtValue() != 1)
16091 if (IntNo == Intrinsic::arm_mve_vst4q &&
16092 cast<ConstantSDNode>(
N->getOperand(7))->getZExtValue() != 3)
16097 UE =
Addr.getNode()->use_end();
16101 UI.getUse().getResNo() !=
Addr.getResNo())
16110 Worklist.push_back(
N);
16111 Worklist.push_back(
User);
16117 bool isLoadOp =
true;
16118 unsigned NewOpc = 0;
16119 unsigned NumVecs = 0;
16123 case Intrinsic::arm_mve_vld2q:
16127 case Intrinsic::arm_mve_vld4q:
16131 case Intrinsic::arm_mve_vst2q:
16136 case Intrinsic::arm_mve_vst4q:
16146 VecTy =
N->getValueType(0);
16148 VecTy =
N->getOperand(3).getValueType();
16162 unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
16164 for (
n = 0;
n < NumResultVecs; ++
n)
16172 Ops.push_back(
N->getOperand(0));
16173 Ops.push_back(
N->getOperand(2));
16174 Ops.push_back(Inc);
16176 for (
unsigned i = 3;
i <
N->getNumOperands(); ++
i)
16177 Ops.push_back(
N->getOperand(
i));
16184 for (
unsigned i = 0;
i < NumResultVecs; ++
i)
16187 NewResults.push_back(
SDValue(UpdN.
getNode(), NumResultVecs + 1));
16203 EVT VT =
N->getValueType(0);
16209 SDNode *VLD =
N->getOperand(0).getNode();
16212 unsigned NumVecs = 0;
16213 unsigned NewOpc = 0;
16214 unsigned IntNo = cast<ConstantSDNode>(VLD->
getOperand(1))->getZExtValue();
16215 if (IntNo == Intrinsic::arm_neon_vld2lane) {
16218 }
else if (IntNo == Intrinsic::arm_neon_vld3lane) {
16221 }
else if (IntNo == Intrinsic::arm_neon_vld4lane) {
16230 unsigned VLDLaneNo =
16231 cast<ConstantSDNode>(VLD->
getOperand(NumVecs+3))->getZExtValue();
16235 if (UI.getUse().getResNo() == NumVecs)
16239 VLDLaneNo != cast<ConstantSDNode>(
User->
getOperand(1))->getZExtValue())
16246 for (
n = 0;
n < NumVecs; ++
n)
16259 unsigned ResNo = UI.getUse().
getResNo();
16261 if (ResNo == NumVecs)
16269 std::vector<SDValue> VLDDupResults;
16270 for (
unsigned n = 0;
n < NumVecs; ++
n)
16284 EVT VT =
N->getValueType(0);
16287 if (Subtarget->hasMVEIntegerOps()) {
16293 N->getOperand(0),
N->getOperand(1));
16305 Op =
Op.getOperand(0);
16310 unsigned EltSize =
Op.getScalarValueSizeInBits();
16312 unsigned Imm = cast<ConstantSDNode>(
Op.getOperand(0))->getZExtValue();
16328 if (Subtarget->hasMVEIntegerOps()) {
16339 if (!Subtarget->hasNEON())
16346 if (
LD &&
Op.hasOneUse() &&
LD->isUnindexed() &&
16347 LD->getMemoryVT() ==
N->getValueType(0).getVectorElementType()) {
16348 SDValue Ops[] = {
LD->getOperand(0),
LD->getOperand(1),
16353 LD->getMemoryVT(),
LD->getMemOperand());
16364 EVT VT =
N->getValueType(0);
16386 assert(StVT != VT &&
"Cannot truncate to the same type");
16396 if (0 != (NumElems * FromEltSz) % ToEltSz)
16399 unsigned SizeRatio = FromEltSz / ToEltSz;
16404 NumElems * SizeRatio);
16410 for (
unsigned i = 0;
i < NumElems; ++
i)
16426 if (TLI.
isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz)
16446 for (
unsigned I = 0;
I <
E;
I++) {
16454 Chains.push_back(Ch);
16480 unsigned NumElements = 4;
16497 unsigned Off0 = Rev ? NumElts : 0;
16498 unsigned Off1 = Rev ? 0 : NumElts;
16500 for (
unsigned I = 0;
I < NumElts;
I += 2) {
16501 if (
M[
I] >= 0 &&
M[
I] != (
int)(Off0 +
I / 2))
16503 if (
M[
I + 1] >= 0 &&
M[
I + 1] != (
int)(Off1 +
I / 2))
16511 if (isVMOVNShuffle(
Shuffle,
false) || isVMOVNShuffle(
Shuffle,
true))
16531 unsigned NewOffset =
i * NumElements * ToEltVT.
getSizeInBits() / 8;
16546 NewToVT, Alignment, MMOFlags, AAInfo);
16547 Stores.push_back(
Store);
16579 unsigned NewOffset =
16587 NewToVT, Alignment, MMOFlags, AAInfo);
16588 Stores.push_back(
Store);
16609 {Extract.getOperand(0), Extract.getOperand(1)});
16640 if (Subtarget->hasNEON())
16644 if (Subtarget->hasMVEIntegerOps()) {
16722 if (!Subtarget->hasNEON())
16726 if (!
Op.getValueType().isVector() || !
Op.getValueType().isSimple() ||
16731 if (!isa<BuildVectorSDNode>(ConstVec))
16734 MVT FloatTy =
Op.getSimpleValueType().getVectorElementType();
16736 MVT IntTy =
N->getSimpleValueType(0).getVectorElementType();
16738 unsigned NumLanes =
Op.getValueType().getVectorNumElements();
16739 if (FloatBits != 32 || IntBits > 32 || (NumLanes != 4 && NumLanes != 2)) {
16750 if (
C == -1 ||
C == 0 ||
C > 32)
16755 unsigned IntrinsicOpcode =
isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
16756 Intrinsic::arm_neon_vcvtfp2fxu;
16762 if (IntBits < FloatBits)
16770 if (!Subtarget->hasMVEFloatOps())
16778 EVT VT =
N->getValueType(0);
16783 auto isIdentitySplat = [&](
SDValue Op,
bool NSZ) {
16787 uint64_t ImmVal =
Op.getOperand(0).getConstantOperandVal(0);
16788 if (VT ==
MVT::v4f32 && (ImmVal == 1664 || (ImmVal == 0 && NSZ)))
16790 if (VT ==
MVT::v8f16 && (ImmVal == 2688 || (ImmVal == 0 && NSZ)))
16803 if (!isIdentitySplat(Op1.
getOperand(2), NSZ))
16822 if (!Subtarget->hasNEON())
16826 unsigned OpOpcode =
Op.getNode()->getOpcode();
16827 if (!
N->getValueType(0).isVector() || !
N->getValueType(0).isSimple() ||
16831 SDValue ConstVec =
N->getOperand(1);
16832 if (!isa<BuildVectorSDNode>(ConstVec))
16835 MVT FloatTy =
N->getSimpleValueType(0).getVectorElementType();
16837 MVT IntTy =
Op.getOperand(0).getSimpleValueType().getVectorElementType();
16839 unsigned NumLanes =
Op.getValueType().getVectorNumElements();
16840 if (FloatBits != 32 || IntBits > 32 || (NumLanes != 4 && NumLanes != 2)) {
16851 if (
C == -1 ||
C == 0 ||
C > 32)
16857 if (IntBits < FloatBits)
16862 unsigned IntrinsicOpcode =
isSigned ? Intrinsic::arm_neon_vcvtfxs2fp :
16863 Intrinsic::arm_neon_vcvtfxu2fp;
16872 if (!
ST->hasMVEIntegerOps())
16876 EVT ResVT =
N->getValueType(0);
16904 EVT AVT = A.getValueType();
16910 auto ExtendIfNeeded = [&](
SDValue A,
unsigned ExtendCode) {
16911 EVT AVT = A.getValueType();
16913 A = DAG.
getNode(ExtendCode, dl,
16919 auto IsVADDV = [&](
MVT RetTy,
unsigned ExtendCode,
ArrayRef<MVT> ExtTypes) {
16920 if (ResVT != RetTy || N0->
getOpcode() != ExtendCode)
16923 if (ExtTypeMatches(A, ExtTypes))
16924 return ExtendIfNeeded(A, ExtendCode);
16927 auto IsPredVADDV = [&](
MVT RetTy,
unsigned ExtendCode,
16934 if (
Ext->getOpcode() != ExtendCode)
16937 if (ExtTypeMatches(A, ExtTypes))
16938 return ExtendIfNeeded(A, ExtendCode);
16941 auto IsVMLAV = [&](
MVT RetTy,
unsigned ExtendCode,
ArrayRef<MVT> ExtTypes,
16951 if (ResVT != RetTy)
16966 if (ExtTypeMatches(A, ExtTypes) && ExtTypeMatches(
B, ExtTypes)) {
16967 A = ExtendIfNeeded(A, ExtendCode);
16968 B = ExtendIfNeeded(
B, ExtendCode);
16973 auto IsPredVMLAV = [&](
MVT RetTy,
unsigned ExtendCode,
ArrayRef<MVT> ExtTypes,
16998 if (ExtTypeMatches(A, ExtTypes) && ExtTypeMatches(
B, ExtTypes)) {
16999 A = ExtendIfNeeded(A, ExtendCode);
17000 B = ExtendIfNeeded(
B, ExtendCode);
17011 EVT VT = Ops[0].getValueType();
17014 "Unexpected illegal long reduction opcode");
17110 Op =
Op->getOperand(1);
17112 Op->getOperand(0)->getOpcode() ==
ISD::MUL) {
17131 unsigned VecOp =
N->getOperand(0).getValueType().isVector() ? 0 : 2;
17132 auto *Shuf = dyn_cast<ShuffleVectorSDNode>(
N->getOperand(VecOp));
17133 if (!Shuf || !Shuf->getOperand(1).isUndef())
17139 for (
int E :
Mask) {
17140 if (E < 0 || E >= (
int)
Mask.size())
17147 if (
N->getNumOperands() != VecOp + 1) {
17148 auto *Shuf2 = dyn_cast<ShuffleVectorSDNode>(
N->getOperand(VecOp + 1));
17149 if (!Shuf2 || !Shuf2->getOperand(1).isUndef() || Shuf2->getMask() !=
Mask)
17155 if (
Op.getValueType().isVector())
17156 Ops.push_back(
Op.getOperand(0));
17167 unsigned IsTop =
N->getConstantOperandVal(2);
17174 if (Op0->
isUndef() && !IsTop)
17188 unsigned NumElts =
N->getValueType(0).getVectorNumElements();
17190 APInt Op0DemandedElts =
17191 IsTop ? Op1DemandedElts
17206 unsigned IsTop =
N->getConstantOperandVal(2);
17208 unsigned NumElts =
N->getValueType(0).getVectorNumElements();
17209 APInt Op0DemandedElts =
17221 EVT VT =
N->getValueType(0);
17225 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(
LHS);
17226 auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(
RHS);
17228 if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
17229 LHS.getOperand(1).isUndef() &&
RHS.getOperand(1).isUndef() &&
17233 LHS.getOperand(0),
RHS.getOperand(0));
17247 if (
auto C = dyn_cast<ConstantSDNode>(
N->getOperand(2))) {
17248 int ShiftAmt =
C->getSExtValue();
17249 if (ShiftAmt == 0) {
17255 if (ShiftAmt >= -32 && ShiftAmt < 0) {
17256 unsigned NewOpcode =
17272 unsigned IntNo = cast<ConstantSDNode>(
N->getOperand(0))->getZExtValue();
17283 case Intrinsic::arm_neon_vshifts:
17284 case Intrinsic::arm_neon_vshiftu:
17285 case Intrinsic::arm_neon_vrshifts:
17286 case Intrinsic::arm_neon_vrshiftu:
17287 case Intrinsic::arm_neon_vrshiftn:
17288 case Intrinsic::arm_neon_vqshifts:
17289 case Intrinsic::arm_neon_vqshiftu:
17290 case Intrinsic::arm_neon_vqshiftsu:
17291 case Intrinsic::arm_neon_vqshiftns:
17292 case Intrinsic::arm_neon_vqshiftnu:
17293 case Intrinsic::arm_neon_vqshiftnsu:
17294 case Intrinsic::arm_neon_vqrshiftns:
17295 case Intrinsic::arm_neon_vqrshiftnu:
17296 case Intrinsic::arm_neon_vqrshiftnsu: {
17297 EVT VT =
N->getOperand(1).getValueType();
17299 unsigned VShiftOpc = 0;
17302 case Intrinsic::arm_neon_vshifts:
17303 case Intrinsic::arm_neon_vshiftu:
17308 if (
isVShiftRImm(
N->getOperand(2), VT,
false,
true, Cnt)) {
17315 case Intrinsic::arm_neon_vrshifts:
17316 case Intrinsic::arm_neon_vrshiftu:
17321 case Intrinsic::arm_neon_vqshifts:
17322 case Intrinsic::arm_neon_vqshiftu:
17327 case Intrinsic::arm_neon_vqshiftsu:
17332 case Intrinsic::arm_neon_vrshiftn:
17333 case Intrinsic::arm_neon_vqshiftns:
17334 case Intrinsic::arm_neon_vqshiftnu:
17335 case Intrinsic::arm_neon_vqshiftnsu:
17336 case Intrinsic::arm_neon_vqrshiftns:
17337 case Intrinsic::arm_neon_vqrshiftnu:
17338 case Intrinsic::arm_neon_vqrshiftnsu:
17350 case Intrinsic::arm_neon_vshifts:
17351 case Intrinsic::arm_neon_vshiftu:
17354 case Intrinsic::arm_neon_vrshifts:
17357 case Intrinsic::arm_neon_vrshiftu:
17360 case Intrinsic::arm_neon_vrshiftn:
17363 case Intrinsic::arm_neon_vqshifts:
17366 case Intrinsic::arm_neon_vqshiftu:
17369 case Intrinsic::arm_neon_vqshiftsu:
17372 case Intrinsic::arm_neon_vqshiftns:
17375 case Intrinsic::arm_neon_vqshiftnu:
17378 case Intrinsic::arm_neon_vqshiftnsu:
17381 case Intrinsic::arm_neon_vqrshiftns:
17384 case Intrinsic::arm_neon_vqrshiftnu:
17387 case Intrinsic::arm_neon_vqrshiftnsu:
17393 return DAG.
getNode(VShiftOpc, dl,
N->getValueType(0),
17397 case Intrinsic::arm_neon_vshiftins: {
17398 EVT VT =
N->getOperand(1).getValueType();
17400 unsigned VShiftOpc = 0;
17404 else if (
isVShiftRImm(
N->getOperand(3), VT,
false,
true, Cnt))
17411 return DAG.
getNode(VShiftOpc, dl,
N->getValueType(0),
17412 N->getOperand(1),
N->getOperand(2),
17416 case Intrinsic::arm_neon_vqrshifts:
17417 case Intrinsic::arm_neon_vqrshiftu:
17421 case Intrinsic::arm_mve_vqdmlah:
17422 case Intrinsic::arm_mve_vqdmlash:
17423 case Intrinsic::arm_mve_vqrdmlah:
17424 case Intrinsic::arm_mve_vqrdmlash:
17425 case Intrinsic::arm_mve_vmla_n_predicated:
17426 case Intrinsic::arm_mve_vmlas_n_predicated:
17427 case Intrinsic::arm_mve_vqdmlah_predicated:
17428 case Intrinsic::arm_mve_vqdmlash_predicated:
17429 case Intrinsic::arm_mve_vqrdmlah_predicated:
17430 case Intrinsic::arm_mve_vqrdmlash_predicated: {
17435 unsigned BitWidth =
N->getValueType(0).getScalarSizeInBits();
17442 case Intrinsic::arm_mve_minv:
17443 case Intrinsic::arm_mve_maxv:
17444 case Intrinsic::arm_mve_minav:
17445 case Intrinsic::arm_mve_maxav:
17446 case Intrinsic::arm_mve_minv_predicated:
17447 case Intrinsic::arm_mve_maxv_predicated:
17448 case Intrinsic::arm_mve_minav_predicated:
17449 case Intrinsic::arm_mve_maxav_predicated: {
17452 unsigned BitWidth =
N->getOperand(2)->getValueType(0).getScalarSizeInBits();
17459 case Intrinsic::arm_mve_addv: {
17462 bool Unsigned = cast<ConstantSDNode>(
N->getOperand(2))->getZExtValue();
17467 case Intrinsic::arm_mve_addlv:
17468 case Intrinsic::arm_mve_addlv_predicated: {
17471 bool Unsigned = cast<ConstantSDNode>(
N->getOperand(2))->getZExtValue();
17472 unsigned Opc = IntNo == Intrinsic::arm_mve_addlv ?
17477 for (
unsigned i = 1,
e =
N->getNumOperands();
i <
e;
i++)
17479 Ops.push_back(
N->getOperand(
i));
17500 EVT VT =
N->getValueType(0);
17503 N->getOperand(0)->getOpcode() ==
ISD::AND &&
17504 N->getOperand(0)->hasOneUse()) {
17512 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(
N->getOperand(1));
17521 if (AndMask == 255 || AndMask == 65535)
17525 if (MaskedBits > ShiftAmt) {
17540 if (
ST->hasMVEIntegerOps())
17545 switch (
N->getOpcode()) {
17558 if (
isVShiftRImm(
N->getOperand(1), VT,
false,
false, Cnt)) {
17559 unsigned VShiftOpc =
17562 return DAG.
getNode(VShiftOpc, dl, VT,
N->getOperand(0),
17578 if (!
LD->isSimple() || !N0.
hasOneUse() ||
LD->isIndexed() ||
17581 EVT FromVT =
LD->getValueType(0);
17582 EVT ToVT =
N->getValueType(0);
17589 unsigned NumElements = 0;
17594 if (NumElements == 0 ||
17605 Align Alignment =
LD->getOriginalAlign();
17626 LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT,
17627 Alignment, MMOFlags, AAInfo);
17628 Loads.push_back(NewLoad);
17636 for (
unsigned i = 0;
i < Loads.size();
i++) {
17641 Extends.push_back(FPExt);
17662 if ((
ST->hasNEON() ||
ST->hasMVEIntegerOps()) &&
17666 EVT VT =
N->getValueType(0);
17673 isa<ConstantSDNode>(Lane)) {
17676 switch (
N->getOpcode()) {
17690 if (
ST->hasMVEIntegerOps())
17699 if (
ST->hasMVEFloatOps())
17710 if ((Subtarget->isThumb() || !Subtarget->hasV6Ops()) &&
17714 EVT VT =
Op.getValueType();
17719 !isa<ConstantSDNode>(
Op.getOperand(1)) ||
17730 APInt MaxC = Max.getConstantOperandAPInt(1);
17733 !(MinC + 1).isPowerOf2())
17751 EVT VT =
N->getValueType(0);
17757 if (!
ST->hasMVEIntegerOps())
17766 auto IsSignedSaturate = [&](
SDNode *Min,
SDNode *Max) {
17775 SaturateC =
APInt(32, (1 << 15) - 1,
true);
17777 SaturateC =
APInt(16, (1 << 7) - 1,
true);
17784 MaxC != ~SaturateC)
17789 if (IsSignedSaturate(
N, N0.
getNode())) {
17811 auto IsUnsignedSaturate = [&](
SDNode *Min) {
17818 SaturateC =
APInt(32, (1 << 16) - 1,
true);
17820 SaturateC =
APInt(16, (1 << 8) - 1,
true);
17829 if (IsUnsignedSaturate(
N)) {
17835 ExtConst = 0x0000FFFF;
17859 const APInt *CV = &
C->getAPIntValue();
17879 auto CCNode = cast<ConstantSDNode>(
CMOV->getOperand(2));
17880 auto CC = CCNode->getAPIntValue().getLimitedValue();
17917 unsigned Heuristic = Subtarget->isThumb() ? 3 : 2;
17924 if ((OrCI & Known.
Zero) != OrCI)
17930 EVT VT =
X.getValueType();
17931 unsigned BitInX = AndC->
logBase2();
17939 for (
unsigned BitInY = 0, NumActiveBits = OrCI.
getActiveBits();
17940 BitInY < NumActiveBits; ++BitInY) {
17941 if (OrCI[BitInY] == 0)
17944 Mask.setBit(BitInY);
17961 switch (
N->getOpcode()) {
17965 if (!isa<ConstantSDNode>(
N.getOperand(1)))
17967 if (!cast<ConstantSDNode>(
N.getOperand(1))->isOne())
17973 auto *Const = dyn_cast<ConstantSDNode>(
N.getOperand(1));
17976 if (Const->isZero())
17978 else if (Const->isOne())
17982 CC = cast<CondCodeSDNode>(
N.getOperand(2))->get();
17986 unsigned IntOp = cast<ConstantSDNode>(
N.getOperand(1))->getZExtValue();
17987 if (IntOp != Intrinsic::test_start_loop_iterations &&
17988 IntOp != Intrinsic::loop_decrement_reg)
18014 bool Negate =
false;
18020 Cond =
N->getOperand(1);
18021 Dest =
N->getOperand(2);
18024 CC = cast<CondCodeSDNode>(
N->getOperand(1))->get();
18025 Cond =
N->getOperand(2);
18026 Dest =
N->getOperand(4);
18027 if (
auto *Const = dyn_cast<ConstantSDNode>(
N->getOperand(3))) {
18028 if (!Const->isOne() && !Const->isZero())
18030 Imm = Const->getZExtValue();
18059 "unsupported condition");
18064 unsigned IntOp = cast<ConstantSDNode>(
Int->getOperand(1))->getZExtValue();
18066 &&
"expected single br user");
18067 SDNode *Br = *
N->use_begin();
18077 if (IntOp == Intrinsic::test_start_loop_iterations) {
18081 if (IsTrueIfZero(
CC,
Imm)) {
18082 SDValue Ops[] = {Chain, Setup, Dest};
18087 UpdateUncondBr(Br, Dest, DAG);
18089 SDValue Ops[] = {Chain, Setup, OtherTarget};
18099 cast<ConstantSDNode>(
Int.getOperand(3))->getZExtValue(), dl,
MVT::i32);
18110 if (
Target == OtherTarget)
18111 UpdateUncondBr(Br, Dest, DAG);
18130 EVT VT =
N->getValueType(0);
18145 auto *LHS00C = dyn_cast<ConstantSDNode>(
LHS->getOperand(0)->getOperand(0));
18146 auto *LHS01C = dyn_cast<ConstantSDNode>(
LHS->getOperand(0)->getOperand(1));
18147 auto *LHS1C = dyn_cast<ConstantSDNode>(
LHS->getOperand(1));
18148 auto *RHSC = dyn_cast<ConstantSDNode>(
RHS);
18149 if ((LHS00C && LHS00C->getZExtValue() == 0) &&
18150 (LHS01C && LHS01C->getZExtValue() == 1) &&
18151 (LHS1C && LHS1C->getZExtValue() == 1) &&
18152 (RHSC && RHSC->getZExtValue() == 0)) {
18155 LHS->getOperand(0)->getOperand(3),
LHS->getOperand(0)->getOperand(4));
18170 EVT VT =
N->getValueType(0);
18181 if (!Subtarget->
isThumb1Only() && Subtarget->hasV6T2Ops()) {
18207 N->getOperand(3), Cmp);
18212 N->getOperand(3), NewCmp);
18218 auto *LHS0C = dyn_cast<ConstantSDNode>(
LHS->getOperand(0));
18219 auto *LHS1C = dyn_cast<ConstantSDNode>(
LHS->getOperand(1));
18220 auto *RHSC = dyn_cast<ConstantSDNode>(
RHS);
18221 if ((LHS0C && LHS0C->getZExtValue() == 0) &&
18222 (LHS1C && LHS1C->getZExtValue() == 1) &&
18223 (RHSC && RHSC->getZExtValue() == 0)) {
18225 LHS->getOperand(2),
LHS->getOperand(3),
18226 LHS->getOperand(4));
18237 if (
N->getConstantOperandVal(2) ==
ARMCC::EQ ||
18241 if (
N->getConstantOperandVal(2) ==
ARMCC::NE)
18246 N->getOperand(3),
C);
18253 if (!Subtarget->
isThumb1Only() && Subtarget->hasV5TOps()) {
18289 N->getOperand(3), CPSRGlue.
getValue(1));
18304 N->getOperand(3), CPSRGlue.
getValue(1));
18322 const APInt *TrueConst;
18329 unsigned ShiftAmount = TrueConst->
logBase2();
18343 if (Known.
Zero == 0xfffffffe)
18346 else if (Known.
Zero == 0xffffff00)
18349 else if (Known.
Zero == 0xffff0000)
18362 EVT DstVT =
N->getValueType(0);
18365 if (
ST->hasMVEIntegerOps() && Src.getOpcode() ==
ARMISD::VDUP) {
18366 EVT SrcVT = Src.getValueType();
18374 Src = Src.getOperand(0);
18378 EVT SrcVT = Src.getValueType();
18398 EVT VT =
N->getValueType(0);
18406 if (
N->getNumOperands() == 2 &&
18410 N->getOperand(0).getOperand(1),
18411 N->getOperand(1).getOperand(0),
18412 N->getOperand(1).getOperand(1));
18415 if (
N->getNumOperands() == 2 &&
18418 auto *S0 = cast<ShuffleVectorSDNode>(
N->getOperand(0).getNode());
18419 auto *S1 = cast<ShuffleVectorSDNode>(
N->getOperand(1).getNode());
18421 if (S0->getOperand(0) == S1->getOperand(0) &&
18422 S0->getOperand(1) == S1->getOperand(1)) {
18425 Mask.append(S1->getMask().begin(), S1->getMask().end());
18445 return Op.getOpcode() == ISD::BUILD_VECTOR ||
18446 Op.getOpcode() == ISD::VECTOR_SHUFFLE ||
18447 (Op.getOpcode() == ISD::BITCAST &&
18448 Op.getOperand(0).getOpcode() == ISD::BUILD_VECTOR);
18451 for (
unsigned Op = 0;
Op <
N->getNumOperands();
Op++) {
18453 for (
unsigned i = 0;
i <
O.getValueType().getVectorNumElements();
i++) {
18456 Extracts.push_back(
Ext);
18470 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
18471 int NumIns =
N->getNumOperands();
18472 assert((NumIns == 2 || NumIns == 4) &&
18473 "Expected 2 or 4 inputs to an MVETrunc");
18475 if (
N->getNumOperands() == 4)
18479 for (
int I = 0;
I < NumIns;
I++) {
18481 ISD::ADD,
DL, StackPtr.getValueType(), StackPtr,
18487 Chains.push_back(Ch);
18504 EVT FromVT =
LD->getMemoryVT();
18505 EVT ToVT =
N->getValueType(0);
18512 unsigned NumElements = 0;
18517 assert(NumElements != 0);
18523 LD->getExtensionType() != NewExtType)
18531 Align Alignment =
LD->getOriginalAlign();
18550 LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT,
18551 Alignment, MMOFlags, AAInfo);
18552 Loads.push_back(NewLoad);
18567 EVT VT =
N->getValueType(0);
18569 assert(
N->getNumValues() == 2 &&
"Expected MVEEXT with 2 elements");
18572 EVT ExtVT =
N->getOperand(0).getValueType().getHalfNumVectorElementsVT(
18574 auto Extend = [&](
SDValue V) {
18589 if (
auto *SVN = dyn_cast<ShuffleVectorSDNode>(
N->getOperand(0))) {
18592 assert(
Mask.size() == SVN->getValueType(0).getVectorNumElements());
18597 auto CheckInregMask = [&](
int Start,
int Offset) {
18599 if (
Mask[Start + Idx] >= 0 &&
Mask[Start + Idx] != Idx * 2 +
Offset)
18605 if (CheckInregMask(0, 0))
18607 else if (CheckInregMask(0, 1))
18608 V0 = Extend(DAG.
getNode(Rev,
DL, SVN->getValueType(0), Op0));
18609 else if (CheckInregMask(0,
Mask.size()))
18611 else if (CheckInregMask(0,
Mask.size() + 1))
18612 V0 = Extend(DAG.
getNode(Rev,
DL, SVN->getValueType(0), Op1));
18617 V1 = Extend(DAG.
getNode(Rev,
DL, SVN->getValueType(0), Op1));
18621 V1 = Extend(DAG.
getNode(Rev,
DL, SVN->getValueType(0), Op0));
18628 if (
N->getOperand(0)->getOpcode() ==
ISD::LOAD)
18638 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
18639 int NumOuts =
N->getNumValues();
18640 assert((NumOuts == 2 || NumOuts == 4) &&
18641 "Expected 2 or 4 outputs to an MVEEXT");
18642 EVT LoadVT =
N->getOperand(0).getValueType().getHalfNumVectorElementsVT(
18644 if (
N->getNumOperands() == 4)
18650 StackPtr, MPI,
Align(4));
18653 for (
int I = 0;
I < NumOuts;
I++) {
18655 ISD::ADD,
DL, StackPtr.getValueType(), StackPtr,
18656 DAG.
getConstant(
I * 16 / NumOuts,
DL, StackPtr.getValueType()));
18661 VT, Chain,
Ptr, MPI, LoadVT,
Align(4));
18662 Loads.push_back(
Load);
18670 switch (
N->getOpcode()) {
18788 unsigned BitWidth =
N->getValueType(0).getSizeInBits();
18795 unsigned BitWidth =
N->getValueType(0).getSizeInBits();
18806 unsigned BitWidth =
N->getValueType(0).getSizeInBits();
18814 unsigned LowWidth =
N->getOperand(0).getValueType().getSizeInBits();
18816 unsigned HighWidth =
N->getOperand(1).getValueType().getSizeInBits();
18824 unsigned HighWidth =
N->getOperand(0).getValueType().getSizeInBits();
18826 unsigned LowWidth =
N->getOperand(1).getValueType().getSizeInBits();
18834 unsigned BitWidth =
N->getValueType(0).getSizeInBits();
18845 unsigned BitWidth =
N->getValueType(0).getSizeInBits();
18854 switch (cast<ConstantSDNode>(
N->getOperand(1))->getZExtValue()) {
18855 case Intrinsic::arm_neon_vld1:
18856 case Intrinsic::arm_neon_vld1x2:
18857 case Intrinsic::arm_neon_vld1x3:
18858 case Intrinsic::arm_neon_vld1x4:
18859 case Intrinsic::arm_neon_vld2:
18860 case Intrinsic::arm_neon_vld3:
18861 case Intrinsic::arm_neon_vld4:
18862 case Intrinsic::arm_neon_vld2lane:
18863 case Intrinsic::arm_neon_vld3lane:
18864 case Intrinsic::arm_neon_vld4lane:
18865 case Intrinsic::arm_neon_vld2dup:
18866 case Intrinsic::arm_neon_vld3dup:
18867 case Intrinsic::arm_neon_vld4dup:
18868 case Intrinsic::arm_neon_vst1:
18869 case Intrinsic::arm_neon_vst1x2:
18870 case Intrinsic::arm_neon_vst1x3:
18871 case Intrinsic::arm_neon_vst1x4:
18872 case Intrinsic::arm_neon_vst2:
18873 case Intrinsic::arm_neon_vst3:
18874 case Intrinsic::arm_neon_vst4:
18875 case Intrinsic::arm_neon_vst2lane:
18876 case Intrinsic::arm_neon_vst3lane:
18877 case Intrinsic::arm_neon_vst4lane:
18879 case Intrinsic::arm_mve_vld2q:
18880 case Intrinsic::arm_mve_vld4q:
18881 case Intrinsic::arm_mve_vst2q:
18882 case Intrinsic::arm_mve_vst4q:
18899 unsigned *Fast)
const {
18910 if (AllowsUnaligned) {
18912 *Fast = Subtarget->hasV7Ops();
18921 if (Subtarget->hasNEON() && (AllowsUnaligned || Subtarget->
isLittle())) {
18928 if (!Subtarget->hasMVEIntegerOps())
18972 if ((
Op.isMemcpy() ||
Op.isZeroMemset()) && Subtarget->hasNEON() &&
18973 !FuncAttributes.
hasFnAttr(Attribute::NoImplicitFloat)) {
18975 if (
Op.size() >= 16 &&
18981 }
else if (
Op.size() >= 8 &&
19002 return (SrcBits == 64 && DestBits == 32);
19011 return (SrcBits == 64 && DestBits == 32);
19047 return Subtarget->hasFullFP16();
19057 return Ext->getType()->getScalarSizeInBits() ==
19058 2 *
Ext->getOperand(0)->getType()->getScalarSizeInBits();
19063 !areExtDoubled(cast<Instruction>(Ext1)) ||
19064 !areExtDoubled(cast<Instruction>(Ext2)))
19075 if (!
I->getType()->isVectorTy())
19078 if (Subtarget->hasNEON()) {
19079 switch (
I->getOpcode()) {
19080 case Instruction::Sub:
19084 Ops.push_back(&
I->getOperandUse(0));
19085 Ops.push_back(&
I->getOperandUse(1));
19093 if (!Subtarget->hasMVEIntegerOps())
19097 if (!
I->hasOneUse())
19099 auto *Sub = cast<Instruction>(*
I->users().begin());
19100 return Sub->getOpcode() == Instruction::FSub && Sub->getOperand(1) ==
I;
19110 switch (
I->getOpcode()) {
19113 case Instruction::FAdd:
19114 case Instruction::ICmp:
19115 case Instruction::FCmp:
19117 case Instruction::FMul:
19118 return !IsFMSMul(
I);
19119 case Instruction::Sub:
19120 case Instruction::FSub:
19121 case Instruction::Shl:
19122 case Instruction::LShr:
19123 case Instruction::AShr:
19124 return Operand == 1;
19126 if (
auto *II = dyn_cast<IntrinsicInst>(
I)) {
19127 switch (II->getIntrinsicID()) {
19128 case Intrinsic::fma:
19130 case Intrinsic::sadd_sat:
19131 case Intrinsic::uadd_sat:
19132 case Intrinsic::arm_mve_add_predicated:
19133 case Intrinsic::arm_mve_mul_predicated:
19134 case Intrinsic::arm_mve_qadd_predicated:
19135 case Intrinsic::arm_mve_vhadd:
19136 case Intrinsic::arm_mve_hadd_predicated:
19137 case Intrinsic::arm_mve_vqdmull:
19138 case Intrinsic::arm_mve_vqdmull_predicated:
19139 case Intrinsic::arm_mve_vqdmulh:
19140 case Intrinsic::arm_mve_qdmulh_predicated:
19141 case Intrinsic::arm_mve_vqrdmulh:
19142 case Intrinsic::arm_mve_qrdmulh_predicated:
19143 case Intrinsic::arm_mve_fma_predicated:
19145 case Intrinsic::ssub_sat:
19146 case Intrinsic::usub_sat:
19147 case Intrinsic::arm_mve_sub_predicated:
19148 case Intrinsic::arm_mve_qsub_predicated:
19149 case Intrinsic::arm_mve_hsub_predicated:
19150 case Intrinsic::arm_mve_vhsub:
19151 return Operand == 1;
19162 for (
auto OpIdx :
enumerate(
I->operands())) {
19163 Instruction *
Op = dyn_cast<Instruction>(OpIdx.value().get());
19165 if (!
Op ||
any_of(Ops, [&](
Use *U) {
return U->get() ==
Op; }))
19169 if (
Shuffle->getOpcode() == Instruction::BitCast)
19177 if (!IsSinker(
I, OpIdx.index()))
19182 for (
Use &U :
Op->uses()) {
19188 Ops.push_back(&
Shuffle->getOperandUse(0));
19190 Ops.push_back(&
Op->getOperandUse(0));
19191 Ops.push_back(&OpIdx.value());
19197 if (!Subtarget->hasMVEIntegerOps())
19215 if (
auto *Ld = dyn_cast<MaskedLoadSDNode>(ExtVal.
getOperand(0))) {
19216 if (Ld->isExpandingLoad())
19220 if (Subtarget->hasMVEIntegerOps())
19265 bool ARMTargetLowering::isFMAFasterThanFMulAndFAdd(
const MachineFunction &MF,
19273 return Subtarget->hasMVEFloatOps();
19291 unsigned Scale = 1;
19308 if ((V & (Scale - 1)) != 0)
19310 return isUInt<5>(V / Scale);
19317 if (VT.
isVector() && Subtarget->hasNEON())
19320 !Subtarget->hasMVEFloatOps())
19323 bool IsNeg =
false;
19332 if (VT.
isVector() && Subtarget->hasMVEIntegerOps()) {
19336 return isShiftedUInt<7,2>(V);
19339 return isShiftedUInt<7,1>(V);
19341 return isUInt<7>(V);
19348 if (VT.
isFloatingPoint() && NumBytes == 2 && Subtarget->hasFPRegs16())
19349 return isShiftedUInt<8, 1>(V);
19352 return isShiftedUInt<8, 2>(V);
19354 if (NumBytes == 1 || NumBytes == 2 || NumBytes == 4) {
19357 return isUInt<8>(V);
19358 return isUInt<12>(V);
19384 default:
return false;
19389 return isUInt<12>(V);
19392 return isUInt<8>(V);
19397 return isShiftedUInt<8, 2>(V);
19403 int Scale = AM.
Scale;
19408 default:
return false;
19416 Scale = Scale & ~1;
19417 return Scale == 2 || Scale == 4 || Scale == 8;
19434 if (Scale & 1)
return false;
19441 const int Scale = AM.
Scale;
19451 return (Scale == 1) || (!AM.
HasBaseReg && Scale == 2);
19467 switch (AM.
Scale) {
19484 int Scale = AM.
Scale;
19486 default:
return false;
19490 if (Scale < 0) Scale = -Scale;
19498 if (Scale == 1 || (AM.
HasBaseReg && Scale == -1))
19511 if (Scale & 1)
return false;
19524 if (!Subtarget->isThumb())
19531 return Imm >= 0 &&
Imm <= 255;
19541 if (!Subtarget->isThumb())
19546 return AbsImm >= 0 && AbsImm <= 255;
19585 int RHSC = (
int)
RHS->getZExtValue();
19586 if (RHSC < 0 && RHSC > -256) {
19599 int RHSC = (
int)
RHS->getZExtValue();
19600 if (RHSC < 0 && RHSC > -0x1000) {
19642 int RHSC = (
int)
RHS->getZExtValue();
19643 if (RHSC < 0 && RHSC > -0x100) {
19648 }
else if (RHSC > 0 && RHSC < 0x100) {
19664 if (!isa<ConstantSDNode>(
Ptr->getOperand(1)))
19670 bool CanChangeType = isLE && !IsMasked;
19673 int RHSC = (
int)
RHS->getZExtValue();
19675 auto IsInRange = [&](
int RHSC,
int Limit,
int Scale) {
19676 if (RHSC < 0 && RHSC > -Limit * Scale && RHSC % Scale == 0) {
19681 }
else if (RHSC > 0 && RHSC < Limit * Scale && RHSC % Scale == 0) {
19693 if (Alignment >= 2 && IsInRange(RHSC, 0x80, 2))
19696 if (IsInRange(RHSC, 0x80, 1))
19698 }
else if (Alignment >= 4 &&
19700 IsInRange(RHSC, 0x80, 4))
19702 else if (Alignment >= 2 &&
19704 IsInRange(RHSC, 0x80, 2))
19706 else if ((CanChangeType || VT ==
MVT::v16i8) && IsInRange(RHSC, 0x80, 1))
19726 bool IsMasked =
false;
19728 Ptr =
LD->getBasePtr();
19729 VT =
LD->getMemoryVT();
19730 Alignment =
LD->getAlign();
19733 Ptr =
ST->getBasePtr();
19734 VT =
ST->getMemoryVT();
19735 Alignment =
ST->getAlign();
19737 Ptr =
LD->getBasePtr();
19738 VT =
LD->getMemoryVT();
19739 Alignment =
LD->getAlign();
19743 Ptr =
ST->getBasePtr();
19744 VT =
ST->getMemoryVT();
19745 Alignment =
ST->getAlign();
19751 bool isLegal =
false;
19753 isLegal = Subtarget->hasMVEIntegerOps() &&
19784 bool IsMasked =
false;
19786 VT =
LD->getMemoryVT();
19787 Ptr =
LD->getBasePtr();
19788 Alignment =
LD->getAlign();
19792 VT =
ST->getMemoryVT();
19793 Ptr =
ST->getBasePtr();
19794 Alignment =
ST->getAlign();
19795 isNonExt = !
ST->isTruncatingStore();
19797 VT =
LD->getMemoryVT();
19798 Ptr =
LD->getBasePtr();
19799 Alignment =
LD->getAlign();
19804 VT =
ST->getMemoryVT();
19805 Ptr =
ST->getBasePtr();
19806 Alignment =
ST->getAlign();
19807 isNonExt = !
ST->isTruncatingStore();
19816 if (
Op->getOpcode() !=
ISD::ADD || !isNonExt)
19818 auto *
RHS = dyn_cast<ConstantSDNode>(
Op->getOperand(1));
19819 if (!
RHS ||
RHS->getZExtValue() != 4)
19821 if (Alignment <
Align(4))
19825 Base =
Op->getOperand(0);
19831 bool isLegal =
false;
19833 isLegal = Subtarget->hasMVEIntegerOps() &&
19866 const APInt &DemandedElts,
19868 unsigned Depth)
const {
19871 switch (
Op.getOpcode()) {
19878 if (
Op.getResNo() == 0) {
19904 case Intrinsic::arm_ldaex:
19905 case Intrinsic::arm_ldrex: {
19906 EVT VT = cast<MemIntrinsicSDNode>(
Op)->getMemoryVT();
19928 const SDValue &SrcSV =
Op.getOperand(0);
19934 "VGETLANE index out of bounds");
19939 EVT VT =
Op.getValueType();
19946 Known = Known.
sext(DstSz);
19948 Known = Known.
zext(DstSz);
19956 Known = KnownOp.
zext(32);
19996 EVT VT =
Op.getValueType();
20009 unsigned Mask =
C->getZExtValue();
20012 unsigned ShrunkMask =
Mask & Demanded;
20013 unsigned ExpandedMask =
Mask | ~Demanded;
20017 if (ShrunkMask == 0)
20023 if (ExpandedMask == ~0U)
20026 auto IsLegalMask = [ShrunkMask, ExpandedMask](
unsigned Mask) ->
bool {
20027 return (ShrunkMask &
Mask) == ShrunkMask && (~ExpandedMask &
Mask) == 0;
20029 auto UseMask = [
Mask,
Op, VT, &TLO](
unsigned NewMask) ->
bool {
20030 if (NewMask ==
Mask)
20039 if (IsLegalMask(0xFF))
20040 return UseMask(0xFF);
20043 if (IsLegalMask(0xFFFF))
20044 return UseMask(0xFFFF);
20048 if (ShrunkMask < 256)
20049 return UseMask(ShrunkMask);
20053 if ((
int)ExpandedMask <= -2 && (
int)ExpandedMask >= -256)
20054 return UseMask(ExpandedMask);
20069 unsigned Depth)
const {
20070 unsigned Opc =
Op.getOpcode();
20078 if (
Op.getResNo() == 0 && !
Op->hasAnyUseOfValue(1) &&
20079 isa<ConstantSDNode>(
Op->getOperand(2))) {
20080 unsigned ShAmt =
Op->getConstantOperandVal(2);
20092 unsigned ModImm =
Op.getConstantOperandVal(1);
20093 unsigned EltBits = 0;
20095 if ((OriginalDemandedBits &
Mask) == 0)
20101 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO,
Depth);
20110 if (!Subtarget->hasV6Ops())
20114 std::string AsmStr =
IA->getAsmString();
20116 SplitString(AsmStr, AsmPieces,
";\n");
20118 switch (AsmPieces.size()) {
20119 default:
return false;
20121 AsmStr = std::string(AsmPieces[0]);
20123 SplitString(AsmStr, AsmPieces,
" \t,");
20126 if (AsmPieces.size() == 3 &&
20127 AsmPieces[0] ==
"rev" && AsmPieces[1] ==
"$0" && AsmPieces[2] ==
"$1" &&
20128 IA->getConstraintString().compare(0, 4,
"=l,l") == 0) {
20151 if (ConstraintVT.
isVector() && Subtarget->hasNEON() &&
20163 unsigned S = Constraint.
size();
20165 switch (Constraint[0]) {
20177 }
else if (
S == 2) {
20178 switch (Constraint[0]) {
20195 Value *CallOperandVal =
info.CallOperandVal;
20198 if (!CallOperandVal)
20202 switch (*constraint) {
20207 if (
type->isIntegerTy()) {
20208 if (Subtarget->isThumb())
20215 if (
type->isFloatingPointTy())
20222 using RCPair = std::pair<unsigned, const TargetRegisterClass *>;
20226 switch (Constraint.
size()) {
20229 switch (Constraint[0]) {
20231 if (Subtarget->isThumb())
20232 return RCPair(0U, &ARM::tGPRRegClass);
20233 return RCPair(0U, &ARM::GPRRegClass);
20235 if (Subtarget->isThumb())
20236 return RCPair(0U, &ARM::hGPRRegClass);
20240 return RCPair(0U, &ARM::tGPRRegClass);
20241 return RCPair(0U, &ARM::GPRRegClass);
20246 return RCPair(0U, &ARM::HPRRegClass);
20248 return RCPair(0U, &ARM::SPRRegClass);
20250 return RCPair(0U, &ARM::DPRRegClass);
20252 return RCPair(0U, &ARM::QPRRegClass);
20258 return RCPair(0U, &ARM::SPR_8RegClass);
20260 return RCPair(0U, &ARM::DPR_8RegClass);
20262 return RCPair(0U, &ARM::QPR_8RegClass);
20268 return RCPair(0U, &ARM::HPRRegClass);
20270 return RCPair(0U, &ARM::SPRRegClass);
20272 return RCPair(0U, &ARM::DPR_VFP2RegClass);
20274 return RCPair(0U, &ARM::QPR_VFP2RegClass);
20280 if (Constraint[0] ==
'T') {
20281 switch (Constraint[1]) {
20285 return RCPair(0U, &ARM::tGPREvenRegClass);
20287 return RCPair(0U, &ARM::tGPROddRegClass);
20296 if (
StringRef(
"{cc}").equals_insensitive(Constraint))
20297 return std::make_pair(
unsigned(ARM::CPSR), &ARM::CCRRegClass);
20305 std::string &Constraint,
20306 std::vector<SDValue>&Ops,
20311 if (Constraint.length() != 1)
return;
20313 char ConstraintLetter = Constraint[0];
20314 switch (ConstraintLetter) {
20317 case 'I':
case 'J':
case 'K':
case 'L':
20318 case 'M':
case 'N':
case 'O':
20323 int64_t CVal64 =
C->getSExtValue();
20324 int CVal = (
int) CVal64;
20327 if (CVal != CVal64)
20330 switch (ConstraintLetter) {
20334 if (Subtarget->hasV6T2Ops() || (Subtarget->hasV8MBaselineOps()))
20335 if (CVal >= 0 && CVal <= 65535)
20342 if (CVal >= 0 && CVal <= 255)
20344 }
else if (Subtarget->
isThumb2()) {
20363 if (CVal >= -255 && CVal <= -1)
20369 if (CVal >= -4095 && CVal <= 4095)
20382 }
else if (Subtarget->
isThumb2()) {
20405 if (CVal >= -7 && CVal < 7)
20407 }
else if (Subtarget->
isThumb2()) {
20430 if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
20436 if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
20444 if (CVal >= 0 && CVal <= 31)
20453 if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
20462 if (Result.getNode()) {
20463 Ops.push_back(Result);
20473 "Unhandled Opcode in getDivRemLibcall");
20479 case MVT::i8: LC =
isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8;
break;
20480 case MVT::i16: LC =
isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16;
break;
20481 case MVT::i32: LC =
isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32;
break;
20482 case MVT::i64: LC =
isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64;
break;
20491 "Unhandled Opcode in getDivRemArgList");
20495 TargetLowering::ArgListEntry Entry;
20496 for (
unsigned i = 0,
e =
N->getNumOperands();
i !=
e; ++
i) {
20497 EVT ArgVT =
N->getOperand(
i).getValueType();
20499 Entry.Node =
N->getOperand(
i);
20503 Args.push_back(Entry);
20514 "Register-based DivRem lowering only");
20515 unsigned Opcode =
Op->getOpcode();
20517 "Invalid opcode for Div/Rem lowering");
20519 EVT VT =
Op->getValueType(0);
20522 if (VT ==
MVT::i64 && isa<ConstantSDNode>(
Op.getOperand(1))) {
20541 bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
20542 : Subtarget->hasDivideInARMMode();
20543 if (hasDivide &&
Op->getValueType(0).isSimple() &&
20546 const SDValue Dividend =
Op->getOperand(0);
20547 const SDValue Divisor =
Op->getOperand(1);
20548 SDValue Div = DAG.
getNode(DivOpcode, dl, VT, Dividend, Divisor);
20552 SDValue Values[2] = {Div, Rem};
20584 EVT VT =
N->getValueType(0);
20586 if (VT ==
MVT::i64 && isa<ConstantSDNode>(
N->getOperand(1))) {
20590 Result[0], Result[1]);
20594 std::vector<Type*> RetTyParams;
20595 Type *RetTyElement;
20605 RetTyParams.push_back(RetTyElement);
20606 RetTyParams.push_back(RetTyElement);
20623 CallLoweringInfo CLI(DAG);
20627 std::pair<SDValue, SDValue> CallResult =
LowerCallTo(CLI);
20630 SDNode *ResNode = CallResult.first.getNode();
20645 "no-stack-arg-probe")) {
20647 cast<ConstantSDNode>(
Op.getOperand(2))->getMaybeAlignValue();
20656 SDValue Ops[2] = { SP, Chain };
20673 SDValue Ops[2] = { NewSP, Chain };
20678 bool IsStrict =
Op->isStrictFPOpcode();
20679 SDValue SrcVal =
Op.getOperand(IsStrict ? 1 : 0);
20680 const unsigned DstSz =
Op.getValueType().getSizeInBits();
20682 assert(DstSz > SrcSz && DstSz <= 64 && SrcSz >= 16 &&
20683 "Unexpected type for custom-lowering FP_EXTEND");
20686 "With both FP DP and 16, any FP conversion is legal!");
20688 assert(!(DstSz == 32 && Subtarget->hasFP16()) &&
20689 "With FP16, 16 to 32 conversion is legal!");
20692 if (SrcSz == 32 && DstSz == 64 && Subtarget->hasFP64()) {
20697 Loc,
Op.getValueType(), SrcVal);
20710 MakeLibCallOptions CallOptions;
20712 for (
unsigned Sz = SrcSz; Sz <= 32 && Sz < DstSz; Sz *= 2) {
20713 bool Supported = (Sz == 16 ? Subtarget->hasFP16() : Subtarget->hasFP64());
20726 assert(LC != RTLIB::UNKNOWN_LIBCALL &&
20727 "Unexpected type for custom-lowering FP_EXTEND");
20728 std::tie(SrcVal, Chain) =
makeLibCall(DAG, LC, DstVT, SrcVal, CallOptions,
20733 return IsStrict ? DAG.
getMergeValues({SrcVal, Chain}, Loc) : SrcVal;
20737 bool IsStrict =
Op->isStrictFPOpcode();
20739 SDValue SrcVal =
Op.getOperand(IsStrict ? 1 : 0);
20741 EVT DstVT =
Op.getValueType();
20742 const unsigned DstSz =
Op.getValueType().getSizeInBits();
20745 assert(DstSz < SrcSz && SrcSz <= 64 && DstSz >= 16 &&
20746 "Unexpected type for custom-lowering FP_ROUND");
20749 "With both FP DP and 16, any FP conversion is legal!");
20754 if (SrcSz == 32 && Subtarget->hasFP16())
20759 assert(LC != RTLIB::UNKNOWN_LIBCALL &&
20760 "Unexpected type for custom-lowering FP_ROUND");
20761 MakeLibCallOptions CallOptions;
20764 std::tie(Result, Chain) =
makeLibCall(DAG, LC, DstVT, SrcVal, CallOptions,
20776 if (v == 0xffffffff)
20788 bool ForCodeSize)
const {
20791 if (VT ==
MVT::f16 && Subtarget->hasFullFP16())
20793 if (VT ==
MVT::f32 && Subtarget->hasFullFP16() &&
20798 if (VT ==
MVT::f64 && Subtarget->hasFP64())
20809 unsigned Intrinsic)
const {
20810 switch (Intrinsic) {
20811 case Intrinsic::arm_neon_vld1:
20812 case Intrinsic::arm_neon_vld2:
20813 case Intrinsic::arm_neon_vld3:
20814 case Intrinsic::arm_neon_vld4:
20815 case Intrinsic::arm_neon_vld2lane:
20816 case Intrinsic::arm_neon_vld3lane:
20817 case Intrinsic::arm_neon_vld4lane:
20818 case Intrinsic::arm_neon_vld2dup:
20819 case Intrinsic::arm_neon_vld3dup:
20820 case Intrinsic::arm_neon_vld4dup: {
20823 auto &
DL =
I.getCalledFunction()->getParent()->getDataLayout();
20824 uint64_t NumElts =
DL.getTypeSizeInBits(
I.getType()) / 64;
20826 Info.ptrVal =
I.getArgOperand(0);
20828 Value *AlignArg =
I.getArgOperand(
I.arg_size() - 1);
20829 Info.align = cast<ConstantInt>(AlignArg)->getMaybeAlignValue();
20834 case Intrinsic::arm_neon_vld1x2:
20835 case Intrinsic::arm_neon_vld1x3:
20836 case Intrinsic::arm_neon_vld1x4: {
20839 auto &
DL =
I.getCalledFunction()->getParent()->getDataLayout();
20840 uint64_t NumElts =
DL.getTypeSizeInBits(
I.getType()) / 64;
20842 Info.ptrVal =
I.getArgOperand(
I.arg_size() - 1);
20844 Info.align.reset();
20849 case Intrinsic::arm_neon_vst1:
20850 case Intrinsic::arm_neon_vst2:
20851 case Intrinsic::arm_neon_vst3:
20852 case Intrinsic::arm_neon_vst4:
20853 case Intrinsic::arm_neon_vst2lane:
20854 case Intrinsic::arm_neon_vst3lane:
20855 case Intrinsic::arm_neon_vst4lane: {
20858 auto &
DL =
I.getCalledFunction()->getParent()->getDataLayout();
20859 unsigned NumElts = 0;
20860 for (
unsigned ArgI = 1, ArgE =
I.arg_size(); ArgI < ArgE; ++ArgI) {
20861 Type *ArgTy =
I.getArgOperand(ArgI)->getType();
20864 NumElts +=
DL.getTypeSizeInBits(ArgTy) / 64;
20867 Info.ptrVal =
I.getArgOperand(0);
20869 Value *AlignArg =
I.getArgOperand(
I.arg_size() - 1);
20870 Info.align = cast<ConstantInt>(AlignArg)->getMaybeAlignValue();
20875 case Intrinsic::arm_neon_vst1x2:
20876 case Intrinsic::arm_neon_vst1x3:
20877 case Intrinsic::arm_neon_vst1x4: {
20880 auto &
DL =
I.getCalledFunction()->getParent()->getDataLayout();
20881 unsigned NumElts = 0;
20882 for (
unsigned ArgI = 1, ArgE =
I.arg_size(); ArgI < ArgE; ++ArgI) {
20883 Type *ArgTy =
I.getArgOperand(ArgI)->getType();
20886 NumElts +=
DL.getTypeSizeInBits(ArgTy) / 64;
20889 Info.ptrVal =
I.getArgOperand(0);
20891 Info.align.reset();
20896 case Intrinsic::arm_mve_vld2q:
20897 case Intrinsic::arm_mve_vld4q: {
20900 Type *VecTy = cast<StructType>(
I.getType())->getElementType(1);
20901 unsigned Factor = Intrinsic == Intrinsic::arm_mve_vld2q ? 2 : 4;
20903 Info.ptrVal =
I.getArgOperand(0);
20910 case Intrinsic::arm_mve_vst2q:
20911 case Intrinsic::arm_mve_vst4q: {
20914 Type *VecTy =
I.getArgOperand(1)->getType();
20915 unsigned Factor = Intrinsic == Intrinsic::arm_mve_vst2q ? 2 : 4;
20917 Info.ptrVal =
I.getArgOperand(0);
20924 case Intrinsic::arm_mve_vldr_gather_base:
20925 case Intrinsic::arm_mve_vldr_gather_base_predicated: {
20927 Info.ptrVal =
nullptr;
20933 case Intrinsic::arm_mve_vldr_gather_base_wb:
20934 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: {
20936 Info.ptrVal =
nullptr;
20942 case Intrinsic::arm_mve_vldr_gather_offset:
20943 case Intrinsic::arm_mve_vldr_gather_offset_predicated: {
20945 Info.ptrVal =
nullptr;
20947 unsigned MemSize = cast<ConstantInt>(
I.getArgOperand(2))->getZExtValue();
20954 case Intrinsic::arm_mve_vstr_scatter_base:
20955 case Intrinsic::arm_mve_vstr_scatter_base_predicated: {
20957 Info.ptrVal =
nullptr;
20963 case Intrinsic::arm_mve_vstr_scatter_base_wb:
20964 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated: {
20966 Info.ptrVal =
nullptr;
20972 case Intrinsic::arm_mve_vstr_scatter_offset:
20973 case Intrinsic::arm_mve_vstr_scatter_offset_predicated: {
20975 Info.ptrVal =
nullptr;
20977 unsigned MemSize = cast<ConstantInt>(
I.getArgOperand(3))->getZExtValue();
20984 case Intrinsic::arm_ldaex:
20985 case Intrinsic::arm_ldrex: {
20986 auto &
DL =
I.getCalledFunction()->getParent()->getDataLayout();
20987 Type *ValTy =
I.getParamElementType(0);
20990 Info.ptrVal =
I.getArgOperand(0);
20992 Info.align =
DL.getABITypeAlign(ValTy);
20996 case Intrinsic::arm_stlex:
20997 case Intrinsic::arm_strex: {
20998 auto &
DL =
I.getCalledFunction()->getParent()->getDataLayout();
20999 Type *ValTy =
I.getParamElementType(1);
21002 Info.ptrVal =
I.getArgOperand(1);
21004 Info.align =
DL.getABITypeAlign(ValTy);
21008 case Intrinsic::arm_stlexd:
21009 case Intrinsic::arm_strexd:
21012 Info.ptrVal =
I.getArgOperand(2);
21018 case Intrinsic::arm_ldaexd:
21019 case Intrinsic::arm_ldrexd:
21022 Info.ptrVal =
I.getArgOperand(0);
21048 unsigned Index)
const {
21060 if (!Subtarget->hasDataBarrier()) {
21064 if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) {
21080 return Builder.CreateCall(DMB, CDomain);
21101 if (Subtarget->preferISHSTBarriers())
21134 bool has64BitAtomicStore;
21136 has64BitAtomicStore =
false;
21137 else if (Subtarget->isThumb())
21138 has64BitAtomicStore = Subtarget->hasV7Ops();
21140 has64BitAtomicStore = Subtarget->hasV6Ops();
21142 unsigned Size =
SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
21156 bool has64BitAtomicLoad;
21158 has64BitAtomicLoad =
false;
21159 else if (Subtarget->isThumb())
21160 has64BitAtomicLoad = Subtarget->hasV7Ops();
21162 has64BitAtomicLoad = Subtarget->hasV6Ops();
21179 hasAtomicRMW = Subtarget->hasV8MBaselineOps();
21180 else if (Subtarget->isThumb())
21181 hasAtomicRMW = Subtarget->hasV7Ops();
21183 hasAtomicRMW = Subtarget->hasV6Ops();
21184 if (Size <= (Subtarget->
isMClass() ? 32U : 64U) && hasAtomicRMW) {
21207 bool HasAtomicCmpXchg;
21209 HasAtomicCmpXchg = Subtarget->hasV8MBaselineOps();
21210 else if (Subtarget->isThumb())
21211 HasAtomicCmpXchg = Subtarget->hasV7Ops();
21213 HasAtomicCmpXchg = Subtarget->hasV6Ops();
21215 Size <= (Subtarget->
isMClass() ? 32U : 64U))
21222 return InsertFencesForAtomic;
21235 M.getOrInsertGlobal(
"__security_cookie",
21243 F->addParamAttr(0, Attribute::AttrKind::InReg);
21249 return M.getGlobalVariable(
"__security_cookie");
21256 return M.getFunction(
"__security_check_cookie");
21261 unsigned &Cost)
const {
21263 if (!Subtarget->hasNEON())
21275 if (!isa<ConstantInt>(Idx))
21290 return Subtarget->hasV6T2Ops();
21294 return Subtarget->hasV6T2Ops();
21299 if (!Subtarget->hasV7Ops())
21305 if (!
Mask ||
Mask->getValue().getBitWidth() > 32u)
21307 auto MaskVal =
unsigned(
Mask->getValue().getZExtValue());
21332 IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd;
21338 Value *Lo =
Builder.CreateExtractValue(LoHi, 0,
"lo");
21339 Value *Hi =
Builder.CreateExtractValue(LoHi, 1,
"hi");
21342 Lo =
Builder.CreateZExt(Lo, ValueTy,
"lo64");
21343 Hi =
Builder.CreateZExt(Hi, ValueTy,
"hi64");
21348 Type *Tys[] = {
Addr->getType() };
21349 Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex;
21354 0,
Attribute::get(
M->getContext(), Attribute::ElementType, ValueTy));
21355 return Builder.CreateTruncOrBitCast(CI, ValueTy);
21360 if (!Subtarget->hasV7Ops())
21377 IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd;
21386 return Builder.CreateCall(Strex, {Lo, Hi,
Addr});
21389 Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex;
21390 Type *Tys[] = {
Addr->getType() };
21394 Strex, {
Builder.CreateZExtOrBitCast(
21412 return (
DL.getTypeSizeInBits(VecTy) + 127) / 128;
21419 unsigned VecSize =
DL.getTypeSizeInBits(VecTy);
21422 if (!Subtarget->hasNEON() && !Subtarget->hasMVEIntegerOps())
21430 if (Subtarget->hasMVEIntegerOps() && Factor == 3)
21438 if (ElSize != 8 && ElSize != 16 && ElSize != 32)
21441 if (Subtarget->hasMVEIntegerOps() && Alignment < ElSize / 8)
21446 if (Subtarget->hasNEON() &&
VecSize == 64)
21452 if (Subtarget->hasNEON())
21454 if (Subtarget->hasMVEIntegerOps())
21474 "Invalid interleave factor");
21475 assert(!Shuffles.
empty() &&
"Empty shufflevector input");
21477 "Unmatched number of shufflevectors and indices");
21479 auto *VecTy = cast<FixedVectorType>(Shuffles[0]->
getType());
21480 Type *EltTy = VecTy->getElementType();
21503 if (NumLoads > 1) {
21507 VecTy->getNumElements() / NumLoads);
21512 BaseAddr =
Builder.CreateBitCast(
21519 auto createLoadIntrinsic = [&](
Value *BaseAddr) {
21520 if (Subtarget->hasNEON()) {
21522 Type *Tys[] = {VecTy, Int8Ptr};
21523 static const Intrinsic::ID LoadInts[3] = {Intrinsic::arm_neon_vld2,
21524 Intrinsic::arm_neon_vld3,
21525 Intrinsic::arm_neon_vld4};
21530 Ops.push_back(
Builder.CreateBitCast(BaseAddr, Int8Ptr));
21533 return Builder.CreateCall(VldnFunc, Ops,
"vldN");
21535 assert((Factor == 2 || Factor == 4) &&
21536 "expected interleave factor of 2 or 4 for MVE");
21538 Factor == 2 ? Intrinsic::arm_mve_vld2q : Intrinsic::arm_mve_vld4q;
21541 Type *Tys[] = {VecTy, VecEltTy};
21546 Ops.push_back(
Builder.CreateBitCast(BaseAddr, VecEltTy));
21547 return Builder.CreateCall(VldnFunc, Ops,
"vldN");
21556 for (
unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
21560 BaseAddr =
Builder.CreateConstGEP1_32(VecTy->getElementType(), BaseAddr,
21561 VecTy->getNumElements() * Factor);
21563 CallInst *VldN = createLoadIntrinsic(BaseAddr);
21567 for (
unsigned i = 0;
i < Shuffles.
size();
i++) {
21569 unsigned Index = Indices[
i];
21575 SubVec =
Builder.CreateIntToPtr(
21579 SubVecs[SV].push_back(SubVec);
21588 auto &SubVec = SubVecs[SVI];
21591 SVI->replaceAllUsesWith(WideVec);
21625 unsigned Factor)
const {
21627 "Invalid interleave factor");
21629 auto *VecTy = cast<FixedVectorType>(SVI->
getType());
21630 assert(VecTy->getNumElements() % Factor == 0 &&
"Invalid interleaved store");
21632 unsigned LaneLen = VecTy->getNumElements() / Factor;
21633 Type *EltTy = VecTy->getElementType();
21637 Align Alignment =
SI->getAlign();
21654 Type *IntTy =
DL.getIntPtrType(EltTy);
21659 Op0 =
Builder.CreatePtrToInt(Op0, IntVecTy);
21660 Op1 =
Builder.CreatePtrToInt(Op1, IntVecTy);
21666 Value *BaseAddr =
SI->getPointerOperand();
21668 if (NumStores > 1) {
21671 LaneLen /= NumStores;
21677 BaseAddr =
Builder.CreateBitCast(
21679 SubVecTy->getElementType()->getPointerTo(
SI->getPointerAddressSpace()));
21686 auto createStoreIntrinsic = [&](
Value *BaseAddr,
21688 if (Subtarget->hasNEON()) {
21689 static const Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2,
21690 Intrinsic::arm_neon_vst3,
21691 Intrinsic::arm_neon_vst4};
21692 Type *Int8Ptr =
Builder.getInt8PtrTy(
SI->getPointerAddressSpace());
21693 Type *Tys[] = {Int8Ptr, SubVecTy};
21696 SI->getModule(), StoreInts[Factor - 2], Tys);
21699 Ops.push_back(
Builder.CreateBitCast(BaseAddr, Int8Ptr));
21701 Ops.push_back(
Builder.getInt32(
SI->getAlign().value()));
21702 Builder.CreateCall(VstNFunc, Ops);
21704 assert((Factor == 2 || Factor == 4) &&
21705 "expected interleave factor of 2 or 4 for MVE");
21707 Factor == 2 ? Intrinsic::arm_mve_vst2q : Intrinsic::arm_mve_vst4q;
21709 SI->getPointerAddressSpace());
21710 Type *Tys[] = {EltPtrTy, SubVecTy};
21715 Ops.push_back(
Builder.CreateBitCast(BaseAddr, EltPtrTy));
21717 for (
unsigned F = 0;
F < Factor;
F++) {
21718 Ops.push_back(
Builder.getInt32(
F));
21719 Builder.CreateCall(VstNFunc, Ops);
21725 for (
unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
21728 if (StoreCount > 0)
21729 BaseAddr =
Builder.CreateConstGEP1_32(SubVecTy->getElementType(),
21730 BaseAddr, LaneLen * Factor);
21735 for (
unsigned i = 0;
i < Factor;
i++) {
21736 unsigned IdxI = StoreCount * LaneLen * Factor +
i;
21737 if (
Mask[IdxI] >= 0) {
21738 Shuffles.push_back(
Builder.CreateShuffleVector(
21741 unsigned StartMask = 0;
21742 for (
unsigned j = 1;
j < LaneLen;
j++) {
21743 unsigned IdxJ = StoreCount * LaneLen * Factor +
j;
21744 if (
Mask[IdxJ * Factor + IdxI] >= 0) {
21745 StartMask =
Mask[IdxJ * Factor + IdxI] - IdxJ;
21755 Shuffles.push_back(
Builder.CreateShuffleVector(
21760 createStoreIntrinsic(BaseAddr, Shuffles);
21775 if (
auto *
ST = dyn_cast<StructType>(Ty)) {
21776 for (
unsigned i = 0;
i <
ST->getNumElements(); ++
i) {
21780 Members += SubMembers;
21782 }
else if (
auto *AT = dyn_cast<ArrayType>(Ty)) {
21786 Members += SubMembers * AT->getNumElements();
21797 }
else if (
auto *VT = dyn_cast<VectorType>(Ty)) {
21804 return VT->getPrimitiveSizeInBits().getFixedValue() == 64;
21806 return VT->getPrimitiveSizeInBits().getFixedValue() == 128;
21808 switch (VT->getPrimitiveSizeInBits().getFixedValue()) {
21821 return (Members > 0 && Members <= 4);
21827 const Align ABITypeAlign =
DL.getABITypeAlign(ArgTy);
21829 return ABITypeAlign;
21833 return std::min(ABITypeAlign,
DL.getStackAlignment());
21842 if (getEffectiveCallingConv(CallConv, isVarArg) !=
21852 return IsHA || IsIntArray;
21856 const Constant *PersonalityFn)
const {
21863 const Constant *PersonalityFn)
const {
21875 void ARMTargetLowering::insertCopiesSplitCSR(
21879 const MCPhysReg *IStart =
TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
21889 RC = &ARM::GPRRegClass;
21890 else if (ARM::DPRRegClass.
contains(*
I))
21891 RC = &ARM::DPRRegClass;
21901 assert(Entry->getParent()->getFunction().hasFnAttribute(
21902 Attribute::NoUnwind) &&
21903 "Function should be nounwind in insertCopiesSplitCSR!");
21904 Entry->addLiveIn(*
I);
21909 for (
auto *Exit : Exits)
21911 TII->get(TargetOpcode::COPY), *
I)
21922 return Subtarget->hasMVEIntegerOps();
21927 auto *VTy = dyn_cast<FixedVectorType>(Ty);
21932 unsigned NumElements = VTy->getNumElements();
21939 if (ScalarTy->isHalfTy() || ScalarTy->isFloatTy())
21940 return Subtarget->hasMVEFloatOps();
21945 return Subtarget->hasMVEIntegerOps() &&
21946 (ScalarTy->isIntegerTy(8) || ScalarTy->isIntegerTy(16) ||
21947 ScalarTy->isIntegerTy(32));
21953 Value *Accumulator)
const {
21961 assert(TyWidth >= 128 &&
"Width of vector type must be at least 128 bits");
21963 if (TyWidth > 128) {
21968 ArrayRef<int> UpperSplitMask(&SplitSeqVec[Stride], Stride);
21970 auto *LowerSplitA =
B.CreateShuffleVector(InputA, LowerSplitMask);
21971 auto *LowerSplitB =
B.CreateShuffleVector(InputB, LowerSplitMask);
21972 auto *UpperSplitA =
B.CreateShuffleVector(InputA, UpperSplitMask);
21973 auto *UpperSplitB =
B.CreateShuffleVector(InputB, UpperSplitMask);
21974 Value *LowerSplitAcc =
nullptr;
21975 Value *UpperSplitAcc =
nullptr;
21978 LowerSplitAcc =
B.CreateShuffleVector(
Accumulator, LowerSplitMask);
21979 UpperSplitAcc =
B.CreateShuffleVector(
Accumulator, UpperSplitMask);
21983 I, OperationType, Rotation, LowerSplitA, LowerSplitB, LowerSplitAcc);
21985 I, OperationType, Rotation, UpperSplitA, UpperSplitB, UpperSplitAcc);
21988 return B.CreateShuffleVector(LowerSplitInt, UpperSplitInt, JoinMask);
21998 return B.CreateIntrinsic(Intrinsic::arm_mve_vcmlaq, Ty,
22000 return B.CreateIntrinsic(Intrinsic::arm_mve_vcmulq, Ty,
22001 {ConstRotation, InputB, InputA});
22013 if (!ConstRotation)
22016 return B.CreateIntrinsic(Intrinsic::arm_mve_vcaddq, Ty,
22017 {ConstHalving, ConstRotation, InputA, InputB});